Rationale: This script performs differential gene expression (dge) and calculates the upregulated and downregulated genes in lichen samples versus Xanthoria culture samples using edgeR and Sleuth

1. Generate DGE lists with edgeR

Load the libraries

Create counts table from kallisto output which will be used for further dge analysis

counts<-read.delim2("../analysis_and_temp_files/06_meta_mapping/kallisto_mycobiont_only.txt",sep=" ")
metadata<-read.csv2("../analysis_and_temp_files/06_meta_mapping/metadata_shared_with_neha.csv",sep=",")

# Extract locus_tag from target_id (XANPAGTX0501 for nuclear genome and GTX0501mito for mito genome) - OPTIONAL
counts[c("locus_tag","rest")]<-str_split_fixed(counts$target_id,'_',n=2)

# Tabulate the counts
counts_tab<-pivot_wider(counts[c(1,4,6)],names_from=sample,values_from = est_counts)
head(counts_tab)
## # A tibble: 6 × 32
##   target_id   XBC2  XBA1  XSA2_2 XSC1  XSE2  XBE1  XTA2  XSC2  XSA2  XBA2  XBC1 
##   <chr>       <chr> <chr> <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 XANPAGTX05… 270   535   593.5  248   335.5 288   484   324.5 429.5 408   324.5
## 2 XANPAGTX05… 194   131   284.5  112   171.5 132   257.5 144   179.5 187   81   
## 3 XANPAGTX05… 191.5 305   264.5  152.5 288   367   332   246.5 187.5 140   367  
## 4 XANPAGTX05… 126   43.5  119.5  60.5  40.5  34.5  87.5  45    73    258.5 43.5 
## 5 XANPAGTX05… 38    223   221    91    38    20    267   98    179   196   32   
## 6 XANPAGTX05… 205   431   487    176   180   86    332   271   320   356   114  
## # ℹ 20 more variables: XTA1 <chr>, XSA1 <chr>, MP_I <chr>, XTC2 <chr>,
## #   XTE2 <chr>, XBE2 <chr>, XMC2 <chr>, MP_II <chr>, S_21XB1 <chr>,
## #   KS21XB1 <chr>, S_21XB3 <chr>, S_42XB1 <chr>, S_42XB2 <chr>, S_42XB3 <chr>,
## #   KS48XB1 <chr>, KS48XB2 <chr>, KS48XB3 <chr>, KS9XB1 <chr>, KS9XB2 <chr>,
## #   KS9XB3 <chr>
write.table(counts_tab,"../analysis_and_temp_files/08_dge_culture_lichen/counts_table.tsv",quote=F,sep="\t",row.names = F) # Final table that will be used for dge calculation

Read the counts table (or Start running the script from this step)

full<-read.delim("../analysis_and_temp_files/08_dge_culture_lichen/counts_table.tsv",row.names=1)
d<-full %>% select(-MP_I,-MP_II) #remove MP_I and MP_II samples (based on previous correlation heatmaps)
colnames(d)
##  [1] "XBC2"    "XBA1"    "XSA2_2"  "XSC1"    "XSE2"    "XBE1"    "XTA2"   
##  [8] "XSC2"    "XSA2"    "XBA2"    "XBC1"    "XTA1"    "XSA1"    "XTC2"   
## [15] "XTE2"    "XBE2"    "XMC2"    "S_21XB1" "KS21XB1" "S_21XB3" "S_42XB1"
## [22] "S_42XB2" "S_42XB3" "KS48XB1" "KS48XB2" "KS48XB3" "KS9XB1"  "KS9XB2" 
## [29] "KS9XB3"
head(d,5:5)
##                         XBC2  XBA1 XSA2_2  XSC1  XSE2  XBE1  XTA2  XSC2  XSA2
## XANPAGTX0501_000001-T1 270.0 535.0  593.5 248.0 335.5 288.0 484.0 324.5 429.5
## XANPAGTX0501_000002-T1 194.0 131.0  284.5 112.0 171.5 132.0 257.5 144.0 179.5
## XANPAGTX0501_000003-T1 191.5 305.0  264.5 152.5 288.0 367.0 332.0 246.5 187.5
## XANPAGTX0501_000004-T1 126.0  43.5  119.5  60.5  40.5  34.5  87.5  45.0  73.0
## XANPAGTX0501_000005-T1  38.0 223.0  221.0  91.0  38.0  20.0 267.0  98.0 179.0
##                         XBA2  XBC1  XTA1 XSA1  XTC2  XTE2  XBE2  XMC2 S_21XB1
## XANPAGTX0501_000001-T1 408.0 324.5 467.5  262 364.5 292.5 164.5 353.0   700.0
## XANPAGTX0501_000002-T1 187.0  81.0 178.5  163 199.0 183.0  99.0 113.0    66.5
## XANPAGTX0501_000003-T1 140.0 367.0 414.5  134 366.5 319.0 164.5 364.5   151.5
## XANPAGTX0501_000004-T1 258.5  43.5  59.5   74  53.0  29.5  47.0  27.5    11.0
## XANPAGTX0501_000005-T1 196.0  32.0 390.0  196 122.0 100.0  13.0  41.0    22.0
##                        KS21XB1 S_21XB3 S_42XB1 S_42XB2 S_42XB3 KS48XB1 KS48XB2
## XANPAGTX0501_000001-T1   302.5   716.0   452.5   333.0   356.5   454.5   462.5
## XANPAGTX0501_000002-T1   121.5    48.5   126.5    67.0    73.0   221.0   176.5
## XANPAGTX0501_000003-T1   216.0   142.0   206.5   202.5   217.5   197.5   214.0
## XANPAGTX0501_000004-T1   212.0    43.5    68.5    97.5    79.5    43.5    45.5
## XANPAGTX0501_000005-T1    17.0    22.0    27.0    19.0    56.0    16.0    17.0
##                        KS48XB3 KS9XB1 KS9XB2 KS9XB3
## XANPAGTX0501_000001-T1   464.0  376.0  297.0  323.5
## XANPAGTX0501_000002-T1   236.5   98.0   90.0  106.0
## XANPAGTX0501_000003-T1   209.0  313.5  216.5  268.5
## XANPAGTX0501_000004-T1    37.5   91.5   36.0   78.5
## XANPAGTX0501_000005-T1     6.0   24.0   21.0   21.0

Remove lowly expressed genes

# create new column, "max" for the max read count in each row
d$max<-apply(d,1,max)
min(d$max)
## [1] 0
d1<-d[which(d$max>=5),] #to remove lowly expressed genes, keep rows in which max read count across whole panel is atleast 5

# drop the max column, as its not needed anymore
raw_counts<-d1 %>% select(-max)
length(rownames(d1)) # total number of genes
## [1] 11105
length(rownames(raw_counts)) # number after removal of lowly expressed genes
## [1] 11105

Calculation of differentially expressed genes

dge <- DGEList(counts=raw_counts)
dge <- calcNormFactors(dge) # perform scale normalization, adjusting for differences 

#group as replicates together
colnames(raw_counts)
##  [1] "XBC2"    "XBA1"    "XSA2_2"  "XSC1"    "XSE2"    "XBE1"    "XTA2"   
##  [8] "XSC2"    "XSA2"    "XBA2"    "XBC1"    "XTA1"    "XSA1"    "XTC2"   
## [15] "XTE2"    "XBE2"    "XMC2"    "S_21XB1" "KS21XB1" "S_21XB3" "S_42XB1"
## [22] "S_42XB2" "S_42XB3" "KS48XB1" "KS48XB2" "KS48XB3" "KS9XB1"  "KS9XB2" 
## [29] "KS9XB3"
group=c(rep("1",17),rep("2",12)) # 17 samples for lichen and 12 for Xanthoria culture
design=model.matrix(~0+group)
colnames(design)=gsub("","",colnames(design))
head(design)
##   group1 group2
## 1      1      0
## 2      1      0
## 3      1      0
## 4      1      0
## 5      1      0
## 6      1      0
v=voom(dge,design,plot=TRUE,normalize.method="none")

head(v)
## An object of class "EList"
## $targets
##        group lib.size norm.factors
## XBC2       1 17504370     0.982611
## XBA1       1 16580337     1.072634
## XSA2_2     1 21037203     1.066389
## XSC1       1 13863356     1.002658
## XSE2       1 17230419     1.025860
## 24 more rows ...
## 
## $E
##                            XBC2     XBA1   XSA2_2     XSC1     XSE2      XBE1
## XANPAGTX0501_000001-T1 3.949841 5.013341 4.819448 4.163897 4.285431 4.2142638
## XANPAGTX0501_000002-T1 3.473983 2.987517 3.759947 3.020576 3.319379 3.0916848
## XANPAGTX0501_000003-T1 3.455319 4.203627 3.654977 3.464183 4.065542 4.5634367
## XANPAGTX0501_000004-T1 2.853350 1.408030 2.512019 2.137533 1.250666 1.1711193
## XANPAGTX0501_000005-T1 1.137143 3.752729 3.396292 2.722495 1.159901 0.3993883
## XANPAGTX0501_000006-T1 3.553351 4.701815 4.534387 3.670320 3.388969 2.4764645
##                            XTA2     XSC2     XSA2     XBA2     XBC1     XTA1
## XANPAGTX0501_000001-T1 4.697926 4.292915 4.911517 4.723220 4.547046 4.767669
## XANPAGTX0501_000002-T1 3.788800 3.123545 3.655178 3.599775 2.551478 3.381120
## XANPAGTX0501_000003-T1 4.154783 3.896987 3.717913 3.183454 4.724350 4.594272
## XANPAGTX0501_000004-T1 2.237004 1.456414 2.362997 4.065836 1.662182 1.804195
## XANPAGTX0501_000005-T1 3.840968 2.570671 3.651165 3.667413 1.225118 4.506483
## XANPAGTX0501_000006-T1 4.154783 4.033428 4.487505 4.526786 3.041954 4.633706
##                            XSA1     XTC2     XTE2       XBE2      XMC2
## XANPAGTX0501_000001-T1 4.343970 4.458966 4.349140 3.63444726 4.6036306
## XANPAGTX0501_000002-T1 3.660943 3.587459 3.674020 2.90474967 2.9646127
## XANPAGTX0501_000003-T1 3.379259 4.466850 4.474055 3.63444726 4.6498169
## XANPAGTX0501_000004-T1 2.526965 1.688681 1.061274 1.83798066 0.9454191
## XANPAGTX0501_000005-T1 3.926182 2.883852 2.805435 0.02301255 1.5131036
## XANPAGTX0501_000006-T1 4.223676 3.783264 3.453591 3.16899185 3.2724905
##                           S_21XB1    KS21XB1   S_21XB3   S_42XB1  S_42XB2
## XANPAGTX0501_000001-T1  5.3698625  3.9723515 5.3144321 4.6923287 4.400417
## XANPAGTX0501_000002-T1  1.9837104  2.6599148 1.4443190 2.8576461 2.095689
## XANPAGTX0501_000003-T1  3.1655488  3.4874007 2.9844273 3.5624484 3.684210
## XANPAGTX0501_000004-T1 -0.5588168  3.4604965 1.2890408 1.9774859 2.633584
## XANPAGTX0501_000005-T1  0.4094743 -0.1415395 0.3214623 0.6503211 0.304276
## XANPAGTX0501_000006-T1  2.7055238  2.6179208 2.6813582 2.9188100 2.626204
##                         S_42XB3    KS48XB1   KS48XB2    KS48XB3    KS9XB1
## XANPAGTX0501_000001-T1 4.353708  4.5581293  4.641683  4.6067886 4.2674127
## XANPAGTX0501_000002-T1 2.073601  3.5195694  3.254421  3.6359970 2.3329584
## XANPAGTX0501_000003-T1 3.642113  3.3577632  3.531649  3.4580602 4.0055273
## XANPAGTX0501_000004-T1 2.195856  1.1878381  1.310377  0.9951813 2.2344686
## XANPAGTX0501_000005-T1 1.694107 -0.2271993 -0.083902 -1.5523065 0.3256164
## XANPAGTX0501_000006-T1 2.608638  2.9231634  2.746817  2.7302474 2.8141944
##                           KS9XB2    KS9XB3
## XANPAGTX0501_000001-T1 4.2245098 4.0729960
## XANPAGTX0501_000002-T1 2.5076099 2.4678556
## XANPAGTX0501_000003-T1 3.7693152 3.8046083
## XANPAGTX0501_000004-T1 1.1975885 2.0369267
## XANPAGTX0501_000005-T1 0.4340287 0.1594107
## XANPAGTX0501_000006-T1 3.1005211 3.1211633
## 
## $weights
##          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## [1,] 4.570805 4.407244 5.170641 3.903719 4.522890 4.218450 4.772853 4.407200
## [2,] 2.639134 2.534159 3.019856 2.218091 2.608050 2.415787 2.766852 2.534132
## [3,] 3.716200 3.579559 4.210421 3.158393 3.675795 3.423309 3.882575 3.579524
## [4,] 1.300018 1.252259 1.481286 1.111546 1.285817 1.198589 1.359986 1.252247
## [5,] 1.756152 1.687120 2.015189 1.480036 1.735715 1.608629 1.842407 1.687102
## [6,] 3.397406 3.269922 3.856297 2.874701 3.359695 3.123404 3.552558 3.269889
##          [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]
## [1,] 3.984487 4.204313 3.911148 4.514136 3.722192 4.410126 4.001358 3.793217
## [2,] 2.268205 2.406889 2.222695 2.602376 2.104255 2.535971 2.278688 2.148339
## [3,] 3.226074 3.411221 3.164889 3.668415 3.001478 3.581922 3.240097 3.061923
## [4,] 1.133557 1.194570 1.113571 1.283223 1.062766 1.253078 1.138152 1.081391
## [5,] 1.512720 1.602744 1.483040 1.731985 1.407440 1.688312 1.519543 1.435412
## [6,] 2.938432 3.112056 2.880719 3.352809 2.729426 3.272126 2.951570 2.785368
##         [,17]     [,18]     [,19]     [,20]     [,21]     [,22]     [,23]
## [1,] 4.032381 4.6453869 5.0706178 4.8407064 4.7521303 4.4319365 4.7410953
## [2,] 2.297977 1.7400493 1.9180691 1.8210324 1.7842532 1.6522669 1.7796774
## [3,] 3.265889 2.8705389 3.1563260 3.0003947 2.9419290 2.7251756 2.9346514
## [4,] 1.146598 1.1177753 1.2189049 1.1632269 1.1426243 1.0684088 1.1400565
## [5,] 1.532071 0.6361116 0.6765939 0.6544587 0.6461746 0.6162353 0.6451384
## [6,] 2.975740 1.9602083 2.1622770 2.0519603 2.0106587 1.8597449 2.0055200
##          [,24]     [,25]     [,26]     [,27]     [,28]    [,29]
## [1,] 5.0724342 4.9370964 5.0282623 5.1138836 4.4552933 5.061281
## [2,] 1.9188386 1.8616209 1.9001383 1.9364084 1.6617183 1.914115
## [3,] 3.1575617 3.0656324 3.1275260 3.1849705 2.7406688 3.149975
## [4,] 1.2193459 1.1864903 1.2086211 1.2294107 1.0735971 1.216638
## [5,] 0.6767716 0.6636186 0.6724436 0.6808214 0.6183553 0.675680
## [6,] 2.1631522 2.0980854 2.1418842 2.1831366 1.8704487 2.157779
## 
## $design
##   group1 group2
## 1      1      0
## 2      1      0
## 3      1      0
## 4      1      0
## 5      1      0
## 24 more rows ...
#MDS plot
cols<-c(rep("darkgreen",17),rep("tan",12)) # green for lichen and brown for xanthoria samples
plotMDS(v, labels = colnames(v),main="min5readcounts_in_any_sample",cex=0.75,col=cols)

#for calculation of DEGs
fit<-lmFit(v,design) # fits row-wise linear models (this will need the voom output file and the design matrix)
fit
## An object of class "MArrayLM"
## $coefficients
##                          group1    group2
## XANPAGTX0501_000001-T1 4.468067 4.5350839
## XANPAGTX0501_000002-T1 3.340491 2.5842564
## XANPAGTX0501_000003-T1 4.018468 3.5382874
## XANPAGTX0501_000004-T1 1.948324 1.6722398
## XANPAGTX0501_000005-T1 2.571747 0.1806385
## 11100 more rows ...
## 
## $stdev.unscaled
##                           group1    group2
## XANPAGTX0501_000001-T1 0.1174059 0.1311368
## XANPAGTX0501_000002-T1 0.1550407 0.2137446
## XANPAGTX0501_000003-T1 0.1303573 0.1665387
## XANPAGTX0501_000004-T1 0.2200666 0.2674036
## XANPAGTX0501_000005-T1 0.1899151 0.3566337
## 11100 more rows ...
## 
## $sigma
## [1] 0.8334902 0.7162065 0.7798425 0.9565603 1.3891010
## 11100 more elements ...
## 
## $df.residual
## [1] 27 27 27 27 27
## 11100 more elements ...
## 
## $cov.coefficients
##            group1     group2
## group1 0.05882353 0.00000000
## group2 0.00000000 0.08333333
## 
## $pivot
## [1] 1 2
## 
## $rank
## [1] 2
## 
## $Amean
## XANPAGTX0501_000001-T1 XANPAGTX0501_000002-T1 XANPAGTX0501_000003-T1 
##               4.491424               3.013036               3.818495 
## XANPAGTX0501_000004-T1 XANPAGTX0501_000005-T1 
##               1.825576               1.568107 
## 11100 more elements ...
## 
## $method
## [1] "ls"
## 
## $design
##   group1 group2
## 1      1      0
## 2      1      0
## 3      1      0
## 4      1      0
## 5      1      0
## 24 more rows ...
#define comparisons as contrast matrices, which are basically AvsB for DEG calculation eg Lichen vs Xanthoria
contrast.matrix= makeContrasts(group1-group2,levels=design) # more comparisons can be provided here, based on the design
fit2= contrasts.fit(fit, contrast.matrix)
fit2= eBayes(fit2) # empirical Bayes statistics for differential expression
fit2
## An object of class "MArrayLM"
## $coefficients
##                         Contrasts
##                          group1 - group2
##   XANPAGTX0501_000001-T1     -0.06701675
##   XANPAGTX0501_000002-T1      0.75623463
##   XANPAGTX0501_000003-T1      0.48018012
##   XANPAGTX0501_000004-T1      0.27608455
##   XANPAGTX0501_000005-T1      2.39110851
## 11100 more rows ...
## 
## $stdev.unscaled
##                         Contrasts
##                          group1 - group2
##   XANPAGTX0501_000001-T1       0.1760142
##   XANPAGTX0501_000002-T1       0.2640537
##   XANPAGTX0501_000003-T1       0.2114903
##   XANPAGTX0501_000004-T1       0.3463149
##   XANPAGTX0501_000005-T1       0.4040486
## 11100 more rows ...
## 
## $sigma
## [1] 0.8334902 0.7162065 0.7798425 0.9565603 1.3891010
## 11100 more elements ...
## 
## $df.residual
## [1] 27 27 27 27 27
## 11100 more elements ...
## 
## $cov.coefficients
##                  Contrasts
## Contrasts         group1 - group2
##   group1 - group2       0.1421569
## 
## $pivot
## [1] 1 2
## 
## $rank
## [1] 2
## 
## $Amean
## XANPAGTX0501_000001-T1 XANPAGTX0501_000002-T1 XANPAGTX0501_000003-T1 
##               4.491424               3.013036               3.818495 
## XANPAGTX0501_000004-T1 XANPAGTX0501_000005-T1 
##               1.825576               1.568107 
## 11100 more elements ...
## 
## $method
## [1] "ls"
## 
## $design
##   group1 group2
## 1      1      0
## 2      1      0
## 3      1      0
## 4      1      0
## 5      1      0
## 24 more rows ...
## 
## $contrasts
##         Contrasts
## Levels   group1 - group2
##   group1               1
##   group2              -1
## 
## $df.prior
## [1] 2.871922
## 
## $s2.prior
## [1] 0.7537697
## 
## $var.prior
## [1] 8.68376
## 
## $proportion
## [1] 0.01
## 
## $s2.post
## [1] 0.7003844 0.5361043 0.6221539 0.8995059 1.8165558
## 11100 more elements ...
## 
## $t
##                         Contrasts
##                          group1 - group2
##   XANPAGTX0501_000001-T1      -0.4549539
##   XANPAGTX0501_000002-T1       3.9114667
##   XANPAGTX0501_000003-T1       2.8784901
##   XANPAGTX0501_000004-T1       0.8405605
##   XANPAGTX0501_000005-T1       4.3907758
## 11100 more rows ...
## 
## $df.total
## [1] 29.87192 29.87192 29.87192 29.87192 29.87192
## 11100 more elements ...
## 
## $p.value
##                         Contrasts
##                          group1 - group2
##   XANPAGTX0501_000001-T1    0.6524317629
##   XANPAGTX0501_000002-T1    0.0004894221
##   XANPAGTX0501_000003-T1    0.0073148659
##   XANPAGTX0501_000004-T1    0.4072722622
##   XANPAGTX0501_000005-T1    0.0001301997
## 11100 more rows ...
## 
## $lods
##                         Contrasts
##                          group1 - group2
##   XANPAGTX0501_000001-T1      -7.3086114
##   XANPAGTX0501_000002-T1      -0.6907942
##   XANPAGTX0501_000003-T1      -3.4750901
##   XANPAGTX0501_000004-T1      -6.3872391
##   XANPAGTX0501_000005-T1       0.9125933
## 11100 more rows ...
## 
## $F
## [1]  0.2069830 15.2995715  8.2857052  0.7065419 19.2789120
## 11100 more elements ...
## 
## $F.p.value
## [1] 0.6524317629 0.0004894221 0.0073148659 0.4072722622 0.0001301997
## 11100 more elements ...

Complete list after dge calculation

# see contrast matrix. in the example, coef=1 means first comparison (group1-group2 above)
complete_table=topTable(fit2, coef=1,number=nrow(v)) #make sure to change "coeff" is changed each time for each comparison
write.table(complete_table,"../analysis_and_temp_files/08_dge_culture_lichen/complete_lich_vs_xan.tsv", append="false", sep = "\t", quote=F)

Volcano plot (dge filter : logFC >= 2, p-val =< 0.05)

plot(complete_table[,"logFC"],-log10(complete_table[,"adj.P.Val"]), main="Volcano_condition_Lich_vs_Xan")
abline(v=c(2,-2),lty=3,col="red",lwd=2)
abline(h=-log10(0.05),lty=3,col="red",lwd=2)

Prelim results:

  • 788 genes upregulated in the lichen compared to 377 in culture

1.2. Exploring DGE

Add functional annotations

funannot<-read.delim2("../../02_long_read_assemblies/analysis_and_temp_files/06_annotate_lecanoro/Annotation_with_OG.txt",sep="\t")

complete_table2<-complete_table %>% rownames_to_column(var = "TranscriptID") %>% 
  left_join(funannot) %>% select(-c(PFAM,InterPro,EggNog,COG,GO.Terms,Secreted,Protease,CAZyme,CDS.transcript,Translation,gDNA,mRNA))
## Joining with `by = join_by(TranscriptID)`
upreg2=complete_table2[complete_table2[,"logFC"]>=2&complete_table2[,"adj.P.Val"]<=0.05,]
downreg2=complete_table2[complete_table2[,"logFC"]<=-2&complete_table2[,"adj.P.Val"]<=0.05,]

Save upregulated or downregulated genes (dge filter : logFC >= 2, p-val =< 0.05)

write.table(upreg2, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_lichen_edgeR.txt", append="false", sep = "\t", quote=F, row.names = F)
write.table(downreg2, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_culture_edgeR.txt", append="false", sep = "\t", quote=F, row.names = F)

Enrich plots based on GO terms

  • Upregulated in lichen
###make table with GO annotations
go_df <-funannot %>% select(TranscriptID,GO.Terms_new) %>% 
  mutate(GO.Terms_new = strsplit(GO.Terms_new, ",")) %>%
        unnest(GO.Terms_new) %>%
  mutate(GO.Terms=sub(".*? ", "", GO.Terms_new),
         short_term = substr(GO.Terms, 1,40))

go_data <- list(
    term2protein = data.frame(
                        term = go_df$GO.Terms_new,
                        gene = go_df$TranscriptID
                        ),
    term2name = data.frame(
                        term = go_df$GO.Terms_new,
                        name = go_df$short_term
                        ),
    
    universe = unique(as.character(go_df$TranscriptID))
)

### select genes upregulated in lichens and order them by fold change
geneList1 = upreg2$logFC
names(geneList1) = as.character(upreg2$TranscriptID)
geneList1 = sort(geneList1, decreasing = TRUE)
geneList1 <- names(geneList1)

###enrichment analysis
enrich1<-clusterProfiler::enricher(geneList1,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)

enrichplot::dotplot(enrich1,showCategory=40,label_format=40)

  • In many cases, one protein has multiple GO terms, which then can be a bit misleading when we look at the overrepresented GO terms. This plot shows GO terms as graphs, where the thickness of the edge represents how many genes have both of these two GO terms
enrich1_peirwise<-enrichplot::pairwise_termsim(enrich1)
enrichplot::emapplot(enrich1_peirwise)

  • Upregulated in culture
### select genes upregulated in culture and order them by fold change
geneList2 = downreg2$logFC
names(geneList2) = as.character(downreg2$TranscriptID)
geneList2 = sort(geneList2, decreasing = TRUE)
geneList2 <- names(geneList2)

###enrichment analysis
enrich2<-clusterProfiler::enricher(geneList2,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)

enrichplot::dotplot(enrich2,showCategory=40,label_format=40)

  • As a graph
enrich2_peirwise<-enrichplot::pairwise_termsim(enrich2)
enrichplot::emapplot(enrich2_peirwise)

Enrich plots based on InterPro families

  • Upregulated in lichens
###make table with IPS annotations
ips_df <-funannot %>% select(TranscriptID,InterPro_new) %>% 
  mutate(InterPro_new = strsplit(InterPro_new, ", I")) %>%
        unnest(InterPro_new) %>% mutate(InterPro_new=str_replace(InterPro_new,"^PR","IPR")) %>%
  mutate(short_term = substr(InterPro_new, 1,40))


ips_data <- list(
    term2protein = data.frame(
                        term = ips_df$InterPro_new,
                        gene = ips_df$TranscriptID
                        ),
    term2name = data.frame(
                        term = ips_df$InterPro_new,
                        name = ips_df$short_term
                        ),
    
    universe = unique(as.character(ips_df$TranscriptID))
)


###enrichment analysis
enrich3<-clusterProfiler::enricher(geneList1,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich3,showCategory=40,label_format=40)

  • Upregulated in culture
enrich4<-clusterProfiler::enricher(geneList2,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich4,showCategory=40,label_format=40)

Why are the same families shown as enriched in both?

  • Checked Ankyrin domain (IPS036770)
  • Across all transcripts (funannot %>% filter(grepl("036770",InterPro)) %>% nrow()) / (funannot %>% nrow()) have this annotation
  • In lichen-upregulated, it’s (upreg2 %>% filter(grepl("036770",InterPro)) %>% nrow()) / (upreg2 %>% nrow())
  • In culture-upregulated, it’s (funannot %>% filter(grepl("036770",InterPro)) %>% nrow()) / (funannot %>% nrow())
  • Double-checked that lichen-upregulated and culture-upregulated are non-overlapping
upreg2 %>% filter(TranscriptID %in% downreg2$TranscriptID)
##  [1] TranscriptID   logFC          AveExpr        t              P.Value       
##  [6] adj.P.Val      B              GeneID         Feature        Contig        
## [11] Start          Stop           Strand         Name           Product       
## [16] Alias.Synonyms EC_number      BUSCO          Membrane       antiSMASH     
## [21] Notes          ProtID         CAZyme_new     COG_new        EggNog_new    
## [26] GO.Terms_new   InterPro_new   PFAM_new       Protease_new   Secreted_new  
## [31] KO            
## <0 rows> (or 0-length row.names)
downreg2 %>% filter(TranscriptID %in% upreg2$TranscriptID)
##  [1] TranscriptID   logFC          AveExpr        t              P.Value       
##  [6] adj.P.Val      B              GeneID         Feature        Contig        
## [11] Start          Stop           Strand         Name           Product       
## [16] Alias.Synonyms EC_number      BUSCO          Membrane       antiSMASH     
## [21] Notes          ProtID         CAZyme_new     COG_new        EggNog_new    
## [26] GO.Terms_new   InterPro_new   PFAM_new       Protease_new   Secreted_new  
## [31] KO            
## <0 rows> (or 0-length row.names)

1.3. Focusing on predicted effectors

  • Let’s investigate the expression pattern of the proteins selected as potenital effectors in ../10_lichen_effectors
eff_list<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/02_characterize_predicted/annotation_predicted_effectors.txt")
eff_list$TranscriptID<-str_replace(eff_list$TranscriptID,"FUN","XANPAGTX0501")
  • of 91 potential effectors, 22 are upregulated in lichen
eff_list %>% filter(TranscriptID %in% upreg2$TranscriptID) %>%
  kable(format = "html", col.names = colnames(eff_list )) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
TranscriptID Product EC_number PFAM_new InterPro_new EggNog_new COG_new GO.Terms_new Protease_new CAZyme_new antiSMASH KO
XANPAGTX0501_000656-T1 hypothetical protein IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins
XANPAGTX0501_001015-T1 hypothetical protein PF00967, PF03330 IPR001153 Barwin domain, IPR009009 RlpA-like protein, double-psi beta-barrel domain, IPR036908 RlpA-like domain superfamily ENOG503P58T, ENOG503P74E, ENOG503P76M S:(S) Function unknown GO_process: GO:0042742 - defense response to bacterium [Evidence IEA], GO_process: GO:0050832 - defense response to fungus [Evidence IEA]
XANPAGTX0501_001063-T1 hypothetical protein Cluster_1
XANPAGTX0501_001856-T1 hypothetical protein IPR037176 Osmotin/thaumatin-like superfamily ENOG503P50I
XANPAGTX0501_002409-T1 hypothetical protein IPR001338 Hydrophobin GO_component: GO:0009277 - fungal-type cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA]
XANPAGTX0501_003287-T1 hypothetical protein Cluster_4
XANPAGTX0501_005364-T1 hypothetical protein
XANPAGTX0501_005573-T1 hypothetical protein
XANPAGTX0501_005675-T1 hypothetical protein PF05572, PF13688 IPR008754 Peptidase M43, pregnancy-associated plasma-A, IPR024079 Metallopeptidase, catalytic domain superfamily ENOG503NZAM O:(O) Posttranslational modification, protein turnover, chaperones GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] M43B
XANPAGTX0501_006142-T1 hypothetical protein
XANPAGTX0501_006194-T1 hypothetical protein 3.2.1.78 IPR036908 RlpA-like domain superfamily ENOG503P7ES O:(O) Posttranslational modification, protein turnover, chaperones
XANPAGTX0501_006245-T1 hypothetical protein
XANPAGTX0501_006291-T1 hypothetical protein Cluster_1
XANPAGTX0501_006967-T1 hypothetical protein
XANPAGTX0501_008875-T1 hypothetical protein
XANPAGTX0501_009026-T1 hypothetical protein
XANPAGTX0501_009189-T1 hypothetical protein
XANPAGTX0501_009606-T1 hypothetical protein IPR011024 Gamma-crystallin-like ENOG503PMNB
XANPAGTX0501_009688-T1 hypothetical protein PF00734 IPR000254 Cellulose-binding domain, fungal, IPR035971 Cellulose-binding domain superfamily, IPR036908 RlpA-like domain superfamily GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0030248 - cellulose binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA]
XANPAGTX0501_009886-T1 hypothetical protein
XANPAGTX0501_010250-T1 hypothetical protein ENOG503P3M4
XANPAGTX0501_010470-T1 hypothetical protein 3.2.1.17 PF00959 IPR002196 Glycoside hydrolase, family 24, IPR009045 Hedgehog signalling/DD-peptidase zinc-binding domain superfamily, IPR023346 Lysozyme-like domain superfamily, IPR033907 Endolysin/autolysin, IPR034690 Endolysin T4 type ENOG503P2E0, ENOG503P2RE O:(O) Posttranslational modification, protein turnover, chaperones GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA], GO_process: GO:0019076 - viral release from host cell [Evidence IEA], GO_process: GO:0019835 - cytolysis [Evidence IEA] GH24 Cluster_1 K01185
  • And 6 are upregulated in culture
eff_list %>% filter(TranscriptID %in% downreg2$TranscriptID) %>%
  kable(format = "html", col.names = colnames(eff_list)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
TranscriptID Product EC_number PFAM_new InterPro_new EggNog_new COG_new GO.Terms_new Protease_new CAZyme_new antiSMASH KO
XANPAGTX0501_000850-T1 hypothetical protein ENOG503P7C3
XANPAGTX0501_005118-T1 hypothetical protein ENOG503PWV2, ENOG503PYSR Cluster_1
XANPAGTX0501_006848-T1 hypothetical protein Cluster_1
XANPAGTX0501_009170-T1 hypothetical protein PF02015 IPR000334 Glycoside hydrolase, family 45, IPR036908 RlpA-like domain superfamily ENOG503P08U, ENOG503Q3X3 G:(G) Carbohydrate transport and metabolism GO_function: GO:0008810 - cellulase activity [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] GH45
XANPAGTX0501_010442-T1 hypothetical protein
XANPAGTX0501_010474-T1 hypothetical protein IPR008979 Galactose-binding-like domain superfamily Cluster_1

2. Generate DGE list with Sleuth

library(sleuth)
kal_dirs <- data.frame("sample"=metadata$run_id, "condition"=metadata$sample_type,
                       "path"=paste0("../analysis_and_temp_files/06_meta_mapping/kallisto_meta_mapping/",metadata$run_id,"_kallisto")) %>% filter(sample!="MP_II" & sample!="MP_I")

#define list of genes to be included: only mycobiont and only those that have  read count across whole panel of >=5
target_id<-rownames(d1)
so <- sleuth_prep(kal_dirs, extra_bootstrap_summary = TRUE,filter_target_id = target_id,transformation_function = function(x) log2(x + 0.1))
## Warning in check_num_cores(num_cores): It appears that you are running Sleuth from within Rstudio.
## Because of concerns with forking processes from a GUI, 'num_cores' is being set to 1.
## If you wish to take advantage of multiple cores, please consider running sleuth from the command line.
## reading in kallisto results
## dropping unused factor levels
## .............................
## normalizing est_counts
## A list of target IDs for filtering was found. Using this for filtering
## 11105 targets passed the filter
## normalizing tpm
## merging in metadata
## summarizing bootstraps
## .............................
so <- sleuth_fit(so, ~condition, 'full')
## fitting measurement error models
## shrinkage estimation
## 16 NA values were found during variance shrinkage estimation due to mean observation values outside of the range used for the LOESS fit.
## The LOESS fit will be repeated using exact computation of the fitted surface to extrapolate the missing values.
## These are the target ids with NA values: XANPAGTX0501_000408-T1, XANPAGTX0501_000967-T1, XANPAGTX0501_000968-T1, XANPAGTX0501_000969-T1, XANPAGTX0501_000980-T1, XANPAGTX0501_000995-T1, XANPAGTX0501_002621-T1, XANPAGTX0501_002938-T1, XANPAGTX0501_004237-T1, XANPAGTX0501_005100-T1, XANPAGTX0501_008798-T1, XANPAGTX0501_009179-T1, XANPAGTX0501_010285-T1, XANPAGTX0501_010514-T1, XANPAGTX0501_010642-T1, XANPAGTX0501_010694-T1
## computing variance of betas
so <- sleuth_fit(so, ~1, 'reduced')
## fitting measurement error models
## shrinkage estimation
## 16 NA values were found during variance shrinkage estimation due to mean observation values outside of the range used for the LOESS fit.
## The LOESS fit will be repeated using exact computation of the fitted surface to extrapolate the missing values.
## These are the target ids with NA values: XANPAGTX0501_000408-T1, XANPAGTX0501_000967-T1, XANPAGTX0501_000968-T1, XANPAGTX0501_000969-T1, XANPAGTX0501_000980-T1, XANPAGTX0501_000995-T1, XANPAGTX0501_002621-T1, XANPAGTX0501_002938-T1, XANPAGTX0501_004237-T1, XANPAGTX0501_005100-T1, XANPAGTX0501_008798-T1, XANPAGTX0501_009179-T1, XANPAGTX0501_010285-T1, XANPAGTX0501_010514-T1, XANPAGTX0501_010642-T1, XANPAGTX0501_010694-T1
## computing variance of betas
so <- sleuth_lrt(so, 'reduced', 'full')
so <- sleuth_wt(so, 'conditionxanthoria_culture')
plot_pca(so, color_by = 'condition', text_labels = TRUE,use_filtered=T)

sleuth_table <- sleuth_results(so, 'conditionxanthoria_culture')
sig <- sleuth_table %>% tibble::as_tibble() %>% 
  filter( qval <= 0.05, grepl("GTX0501",target_id) ) %>% 
  arrange(desc(b)) %>% mutate(change = if_else(b > 1, "culture", ifelse(b< -1, "lichen", "low_logFC")))

table(sig$change)
## 
##   culture    lichen low_logFC 
##       564      1185      4124
pca<-plot_pca(so, color_by = 'condition', text_labels = F,use_filtered=T)
pca+theme_bw()+theme(text=element_text(size=7))

ggsave('../results/lichen_culture_pca.pdf',width = 4.5, height = 3)
plot_pc_variance(so)

2.3. Save gene lists

sig2<-sig %>% left_join(funannot, by=c("target_id"="TranscriptID"))

upreg_sl=sig2 %>% filter(change=="lichen") %>% select(-c(CDS.transcript,Translation,gDNA,mRNA))
downreg_sl=sig2 %>% filter(change=="culture")%>% select(-c(CDS.transcript,Translation,gDNA,mRNA))

write.table(upreg_sl, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_lichen_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)
write.table(downreg_sl, "../analysis_and_temp_files/08_dge_culture_lichen/upreg_in_culture_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)

tabd_df <- so$obs_norm[so$obs_norm$target_id %in% target_id,]
tabd_df <- dplyr::select(tabd_df, target_id, sample, 
            tpm)
tabd_df <- reshape2::dcast(tabd_df, target_id ~ sample, 
            value.var = "tpm") 
write.table(tabd_df, "../analysis_and_temp_files/08_dge_culture_lichen/norm_counts_sleuth.txt", append="false", sep = "\t", quote=F, row.names = F)

2.3. Let’s see how consistent are the lists of DGE between edgeR and sleuth

  • Upregulated in lichens: the entire edgeR set is included in the sleuth set
library(ggVennDiagram)
## 
## Attaching package: 'ggVennDiagram'
## The following object is masked from 'package:tidyr':
## 
##     unite
venn_lichen<-list(egdeR = upreg2$TranscriptID,
               sleuth = sig$target_id[sig$change=="lichen"])
l<-ggVennDiagram(venn_lichen,label_size = 5,set_size=5)+labs(title = "Upregulated in lichen")+theme(title=element_text(size=12))+scale_x_continuous(expand = expansion(mult = .4))
l

  • When plotting edgeR results in relation to the sleuth b-values, there is a clear trend, but no clear cut-off in b-value corresponding to the logFC>2 cut-off in edgeR
upreg_viz<-sig %>% filter(b<0) %>% 
  mutate(edgeR = ifelse(target_id %in% upreg2$TranscriptID,"upregulated_in_edgeR","not_upregulated_in_edgeR"))
ggplot(upreg_viz)+geom_density(aes(x=b,fill=edgeR))+facet_wrap(.~edgeR,nrow = 2)

  • For the genes upregulated in culture, edgeR set had 30 genes (of 377) which weren’t included in the sleuth set
venn_cult<-list(egdeR = downreg2$TranscriptID,
               sleuth = sig$target_id[sig$change=="culture"])
c<-ggVennDiagram(venn_cult,label_size = 5,set_size=5)+labs(title = "Upregulated in culture")+theme(title=element_text(size=12))+scale_x_continuous(expand = expansion(mult = .4))
c

  • Comparing edgeR and b-values looks as above
down_viz<-sig %>% filter(b>0) %>% 
  mutate(edgeR = ifelse(target_id %in% downreg2$TranscriptID,"upregulated_in_edgeR","not_upregulated_in_edgeR"))

ggplot(down_viz)+geom_density(aes(x=b,fill=edgeR))+facet_wrap(.~edgeR,nrow = 2)

2.4. bootstrap of several genes

  • Picked the gene with maximal b-value (log2 fold changes between conditions). Here, b-value > 0 means overrexpression in culture
plot_bootstrap(so, 
               target_id = sig$target_id[which.max(sig$b)], 
               units = "est_counts", 
               color_by = "condition")

  • Picked the gene with minimal b-value, i.e. most strongly overrexpressed in lichen
plot_bootstrap(so, 
               target_id = sig$target_id[which.min(sig$b)], 
               units = "est_counts", 
               color_by = "condition")

  • Picked the gene with minimal absolute b-value, i.e. with the lowest fold change difference. The b for it is sig$b[which.min(abs(sig$b))] and the q-value (p-value adjusted for multiple test correction) is sig$qval[which.min(abs(sig$b))], but you can see the difference between sample types somewhat
plot_bootstrap(so, 
               target_id = sig$target_id[which.min(abs(sig$b))], 
               units = "est_counts", 
               color_by = "condition")

2.5. GO enrichment analysis on sleuth

### select genes upregulated in lichens and order them by fold change
geneList5 = sig$b[sig$change=="lichen"]
names(geneList5) = sig$target_id[sig$change=="lichen"]
geneList5 = sort(geneList5, decreasing = F)
geneList5 <- names(geneList5)

###enrichment analysis
enrich5<-clusterProfiler::enricher(geneList5,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)

enrichplot::dotplot(enrich5,showCategory=40,label_format=40)

enrich5_pairwise<-enrichplot::pairwise_termsim(enrich5)
enrichplot::emapplot(enrich5_pairwise)

### select genes upregulated in lichens and order them by fold change
geneList6 = sig$b[sig$change=="culture"]
names(geneList6) = sig$target_id[sig$change=="culture"]
geneList6 = sort(geneList6, decreasing = T)
geneList6 <- names(geneList6)

###enrichment analysis
enrich6<-clusterProfiler::enricher(geneList6,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=go_data$universe,
    TERM2GENE=go_data$term2protein,
    TERM2NAME=go_data$term2name)

enrichplot::dotplot(enrich6,showCategory=40,label_format=40)

enrich6_pairwise<-enrichplot::pairwise_termsim(enrich6)
enrichplot::emapplot(enrich6_pairwise)

2.6. InterPro enrichment analysis on sleuth

enrich7<-clusterProfiler::enricher(geneList5,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich7,showCategory=40,label_format=40)

* Upregulated in lichen as a graph

enrich7_pairwise<-enrichplot::pairwise_termsim(enrich7)
enrichplot::emapplot(enrich7_pairwise)

pdf(file="../results/lichen_upr.pdf",width=5,height=4)
enrichplot::emapplot(enrich7_pairwise,cex_label_category=0.4,cex_line=0.25,
                     shadowtext=F,cex_pie2axis=0.1,repel=T)
dev.off()
## quartz_off_screen 
##                 2
enrich8<-clusterProfiler::enricher(geneList6,
    pAdjustMethod = "none",
    minGSSize = 1,
    maxGSSize = 2000,
    qvalueCutoff = 1,
    universe=ips_data$universe,
    TERM2GENE=ips_data$term2protein,
    TERM2NAME=ips_data$term2name)

enrichplot::dotplot(enrich8,showCategory=40,label_format=40)

* Upregulated in culture as a graph

enrich8_pairwise<-enrichplot::pairwise_termsim(enrich8)
enrichplot::emapplot(enrich8_pairwise)

pdf(file="../results/culture_upr.pdf",width=5,height=4)
enrichplot::emapplot(enrich8_pairwise,cex_label_category=0.4,cex_line=0.25,
                     shadowtext=F,cex_pie2axis=0.1,repel=T)
dev.off()
## quartz_off_screen 
##                 2

2.7. These enrichment plots need to be taken with a grain of salt

perc<-funannot %>% mutate(go_annotated=ifelse(!(is.na(GO.Terms_new)),"Annotated","Non-annotated"),
         ipr_annotated=ifelse(!(is.na(InterPro_new)),"Annotated","Non-annotated"),
         gene_set=case_when(
           TranscriptID %in% sig$target_id[sig$change=="culture"] ~ "Upregulated in culture",
          TranscriptID %in% sig$target_id[sig$change=="lichen"] ~ "Upregulated in lichen",
          T ~ "non-DGE"))

ipr_perc_dge<- perc %>% group_by(gene_set,ipr_annotated) %>% summarize(n=n()) %>% mutate(database="InterPro",annotated=ipr_annotated) %>% select(-ipr_annotated)
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
go_perc_dge <- perc %>% group_by(gene_set,go_annotated) %>% summarize(n=n()) %>% mutate(database="GO",annotated=go_annotated) %>% select(-go_annotated)
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
ipr_perc_all<- perc %>% group_by(ipr_annotated) %>% summarize(n=n()) %>% mutate(database="InterPro",gene_set="Whole transcriptome",annotated=ipr_annotated) %>% select(-ipr_annotated)
go_perc_all <- perc %>% group_by(go_annotated) %>% summarize(n=n()) %>% mutate(database="GO",gene_set="Whole transcriptome",annotated=go_annotated) %>% select(-go_annotated)
perc2<-rbind(ipr_perc_dge,go_perc_dge,ipr_perc_all,go_perc_all)
perc2$annotated<-factor(perc2$annotated,levels=c("Non-annotated","Annotated"))

ggplot(perc2 %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=database,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  facet_wrap(~gene_set)

* Instead of two datatbases, visualize the number of proteins with any kind of functional annotation * NB: counted tRNA as being annotated

#make bar graph for DGE
perc<-funannot %>% mutate(annotated=ifelse(!(is.na(GO.Terms_new))|                                       !(is.na(CAZyme_new))| !(is.na(PFAM_new))|
                !(is.na(Protease_new))|!(is.na(InterPro_new))| KO!=""|Feature=="tRNA",
                "Annotated","Non-annotated"),
         gene_set=case_when(
           TranscriptID %in% sig$target_id[sig$change=="culture"] ~ "Upregulated in culture",
          TranscriptID %in% sig$target_id[sig$change=="lichen"] ~ "Upregulated in lichen",
          T ~ "non-DGE"))

perc_dge<- perc %>% group_by(gene_set,annotated) %>% summarize(n=n()) %>%
  group_by(gene_set) %>% mutate(perc=(100*n)/sum(n))
## `summarise()` has grouped output by 'gene_set'. You can override using the
## `.groups` argument.
dge_plot<-ggplot(perc_dge %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=gene_set,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  geom_text(aes(label = paste0(round(perc,0),"%"),color=annotated), position=position_stack(vjust = 0.5),size=7)+
  xlab("")+ylab("# of transcripts")+ggtitle("Differentially expressed")+
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+ theme_minimal()+
  theme(legend.title=element_blank(),legend.position = c(0.2, 0.8),
  legend.background = element_rect(fill="white",
                                 linewidth=0.5, linetype="solid", 
                                  colour ="grey"),
  panel.grid.major.x = element_blank(),
  text = element_text(size=12),
  axis.text=element_text(size=8))

#donut plot for the whole genome
perc_all <- perc %>% group_by(annotated) %>% summarize(n=n()) %>% mutate(perc=(100*n)/sum(n))
perc_all$ymax <- cumsum(perc_all$perc)
# Compute the bottom of each rectangle
perc_all$ymin <- c(0, head(perc_all$ymax, n=-1))
# Compute label position
perc_all$labelPosition <- (perc_all$ymax + perc_all$ymin) / 2
# Compute a good label
perc_all$label <- paste0(round(perc_all$perc,0), "%")
# Make the plot
all_plot<-ggplot(perc_all, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=annotated)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label,color=annotated), size=8) +
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+
  coord_polar(theta="y") + ggtitle("Whole transcriptome (n = 11,185)")+
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none",text = element_text(size=12))

library(patchwork)
dge_plot+all_plot

all_plot<-ggplot(perc_all, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=annotated)) +
  geom_rect() +
  geom_text( x=3.5, aes(y=labelPosition, label=label,color=annotated), size=2.75) +
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+
  coord_polar(theta="y") + ggtitle("Whole transcriptome (n = 11,185)")+
  xlim(c(2, 4)) +
  theme_void() +
  theme(legend.position = "none",text = element_text(size=8),plot.title = element_text(size=8))
dge_plot<-ggplot(perc_dge %>% filter(gene_set %in% c("Upregulated in culture","Upregulated in lichen")),
       aes(x=gene_set,y=n,fill=annotated))+geom_bar(position="stack",stat="identity")+
  geom_text(aes(label = paste0(round(perc,0),"%"),color=annotated), position=position_stack(vjust = 0.5),size=2.75)+
  xlab("")+ylab("# of transcripts")+ggtitle("Differentially expressed")+
  scale_fill_manual(values = c("Non-annotated"="#ffda73","Annotated"="#156902"))+
  scale_color_manual(values = c("Non-annotated"="#156902","Annotated"="#ffda73"))+ theme_minimal()+
  theme(legend.title=element_blank(),legend.position = c(0.2, 0.8),
  legend.background = element_rect(fill="white",
                                 linewidth=0.5, linetype="solid", 
                                  colour ="grey"),
  panel.grid.major.x = element_blank(),
  text = element_text(size=8),
  axis.text=element_text(size=6),plot.title = element_text(size=8))

pdf(file="../results/annotated_percent.pdf",width=4.5,height=3.5)
dge_plot+all_plot
dev.off()
## quartz_off_screen 
##                 2

2.8. Differentially expressed effectors

Upregulated in lichens

  • of 91 potential effectors, 27 are upregulated in lichen
eff_list<- eff_list %>%
  left_join(sig, by = c("TranscriptID"="target_id")) 

  eff_list %>% filter(change=="lichen") %>%
  kable(format = "html", col.names = colnames(eff_list)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
TranscriptID Product EC_number PFAM_new InterPro_new EggNog_new COG_new GO.Terms_new Protease_new CAZyme_new antiSMASH KO pval qval b se_b mean_obs var_obs tech_var sigma_sq smooth_sigma_sq final_sigma_sq change
XANPAGTX0501_000656-T1 hypothetical protein IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins 0.0000000 0.0000000 -4.357565 0.4814432 5.9017838 6.3427509 0.0164185 1.6140872 0.0998084 1.6140872 lichen
XANPAGTX0501_001015-T1 hypothetical protein PF00967, PF03330 IPR001153 Barwin domain, IPR009009 RlpA-like protein, double-psi beta-barrel domain, IPR036908 RlpA-like domain superfamily ENOG503P58T, ENOG503P74E, ENOG503P76M S:(S) Function unknown GO_process: GO:0042742 - defense response to bacterium [Evidence IEA], GO_process: GO:0050832 - defense response to fungus [Evidence IEA] 0.0000000 0.0000000 -3.506517 0.4643931 5.2846088 4.5519407 0.0122602 1.5048031 0.1598182 1.5048031 lichen
XANPAGTX0501_001063-T1 hypothetical protein Cluster_1 0.0000000 0.0000000 -4.589396 0.3736799 7.2076979 6.2387691 0.0049656 0.9773059 0.0431282 0.9773059 lichen
XANPAGTX0501_001856-T1 hypothetical protein IPR037176 Osmotin/thaumatin-like superfamily ENOG503P50I 0.0000000 0.0000000 -3.823871 0.4267541 5.3759413 4.9088637 0.0242365 1.2568772 0.1485279 1.2568772 lichen
XANPAGTX0501_002041-T1 hypothetical protein 0.0000024 0.0000117 -1.127546 0.2389993 4.1982488 0.4966441 0.0198173 0.1639853 0.3819970 0.3819970 lichen
XANPAGTX0501_002409-T1 hypothetical protein IPR001338 Hydrophobin GO_component: GO:0009277 - fungal-type cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] 0.0000000 0.0000000 -4.088717 0.2304978 9.5981869 4.5603784 0.0003597 0.3733771 0.0994788 0.3733771 lichen
XANPAGTX0501_003186-T1 hypothetical protein Cluster_1 0.0002443 0.0007831 -1.045733 0.2850859 7.2273522 0.8260378 0.0009527 0.5707678 0.0428782 0.5707678 lichen
XANPAGTX0501_003287-T1 hypothetical protein Cluster_4 0.0000000 0.0000000 -1.493602 0.1967691 4.7017340 0.7206409 0.0136889 0.1524257 0.2586729 0.2586729 lichen
XANPAGTX0501_005364-T1 hypothetical protein 0.0000000 0.0000000 -4.400832 0.3864616 6.9122523 5.8787781 0.0054429 1.0451753 0.0488290 1.0451753 lichen
XANPAGTX0501_005573-T1 hypothetical protein 0.0000000 0.0000000 -2.506069 0.2266291 5.6609996 1.9262226 0.0095206 0.3517756 0.1192397 0.3517756 lichen
XANPAGTX0501_005675-T1 hypothetical protein PF05572, PF13688 IPR008754 Peptidase M43, pregnancy-associated plasma-A, IPR024079 Metallopeptidase, catalytic domain superfamily ENOG503NZAM O:(O) Posttranslational modification, protein turnover, chaperones GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] M43B 0.0000000 0.0000000 -2.825087 0.1615625 8.3760453 2.1821673 0.0005138 0.1831034 0.0515336 0.1831034 lichen
XANPAGTX0501_006142-T1 hypothetical protein 0.0000000 0.0000000 -2.552912 0.1836978 6.0165771 1.8662663 0.0052380 0.2321399 0.0918787 0.2321399 lichen
XANPAGTX0501_006194-T1 hypothetical protein 3.2.1.78 IPR036908 RlpA-like domain superfamily ENOG503P7ES O:(O) Posttranslational modification, protein turnover, chaperones 0.0000000 0.0000000 -1.466337 0.1233940 5.8505478 0.6054853 0.0035018 0.0642183 0.1036059 0.1036059 lichen
XANPAGTX0501_006245-T1 hypothetical protein 0.0000000 0.0000000 -3.929116 0.4012715 4.2238012 4.9707307 0.0580619 1.0746222 0.3750018 1.0746222 lichen
XANPAGTX0501_006291-T1 hypothetical protein Cluster_1 0.0000000 0.0000000 -2.524612 0.4345364 1.9200532 2.4738155 0.1747462 0.7301201 1.1535179 1.1535179 lichen
XANPAGTX0501_006751-T1 hypothetical protein Cluster_4 0.0000000 0.0000000 -1.137824 0.1152815 6.0347959 0.4150017 0.0027983 0.0902720 0.0906888 0.0906888 lichen
XANPAGTX0501_006967-T1 hypothetical protein 0.0000000 0.0000000 -1.686952 0.1629173 6.9741854 0.8949974 0.0013260 0.1853835 0.0471902 0.1853835 lichen
XANPAGTX0501_007280-T1 hypothetical protein 0.0000002 0.0000012 -1.317948 0.2534485 5.7082083 0.8721158 0.0044510 0.4474169 0.1150951 0.4474169 lichen
XANPAGTX0501_008875-T1 hypothetical protein 0.0050513 0.0117058 -1.934027 0.6898007 0.8582434 4.1673621 0.0976534 3.2495299 1.4505443 3.2495299 lichen
XANPAGTX0501_009026-T1 hypothetical protein 0.0000000 0.0000000 -1.512207 0.2285639 5.9477612 0.9288757 0.0034740 0.3640176 0.0965398 0.3640176 lichen
XANPAGTX0501_009189-T1 hypothetical protein 0.0000000 0.0000000 -4.014594 0.3923972 2.3682678 4.8172641 0.0859581 0.7106675 0.9971806 0.9971806 lichen
XANPAGTX0501_009311-T1 hypothetical protein PF05730 IPR008427 Extracellular membrane protein, CFEM domain 0.0000000 0.0000000 -1.019264 0.1582712 7.0425826 0.4309233 0.0010038 0.1752083 0.0457542 0.1752083 lichen
XANPAGTX0501_009606-T1 hypothetical protein IPR011024 Gamma-crystallin-like ENOG503PMNB 0.0009831 0.0027450 -2.529422 0.7675780 4.3477047 5.6039016 0.0639172 4.0806311 0.3420776 4.0806311 lichen
XANPAGTX0501_009688-T1 hypothetical protein PF00734 IPR000254 Cellulose-binding domain, fungal, IPR035971 Cellulose-binding domain superfamily, IPR036908 RlpA-like domain superfamily GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0030248 - cellulose binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] 0.0000000 0.0000000 -2.868656 0.1587323 5.5946483 2.2383423 0.0098738 0.1673667 0.1253745 0.1673667 lichen
XANPAGTX0501_009886-T1 hypothetical protein 0.0000000 0.0000000 -2.690801 0.4211666 2.0891440 2.6668573 0.1520313 0.7272087 1.0957544 1.0957544 lichen
XANPAGTX0501_010250-T1 hypothetical protein ENOG503P3M4 0.0000000 0.0000000 -1.956382 0.1621732 5.2525682 1.1399713 0.0084374 0.1765706 0.1640329 0.1765706 lichen
XANPAGTX0501_010470-T1 hypothetical protein 3.2.1.17 PF00959 IPR002196 Glycoside hydrolase, family 24, IPR009045 Hedgehog signalling/DD-peptidase zinc-binding domain superfamily, IPR023346 Lysozyme-like domain superfamily, IPR033907 Endolysin/autolysin, IPR034690 Endolysin T4 type ENOG503P2E0, ENOG503P2RE O:(O) Posttranslational modification, protein turnover, chaperones GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA], GO_process: GO:0019076 - viral release from host cell [Evidence IEA], GO_process: GO:0019835 - cytolysis [Evidence IEA] GH24 Cluster_1 K01185 0.0000000 0.0000000 -1.921148 0.2543482 5.5919285 1.3660764 0.0067217 0.4483603 0.1256340 0.4483603 lichen

Visualize expression profiles. Manually checked all graphs, which all look legit

  • some have strong and obvious effect: e.g. XANPAGTX0501_005675-T1
lichen_eff<-eff_list$TranscriptID[eff_list$TranscriptID %in% sig$target_id[sig$change=="lichen"]]
plot_bootstrap(so, 
               target_id = lichen_eff[11], 
               units = "est_counts", 
               color_by = "condition")

  • some have small effect: e.g. XANPAGTX0501_003186-T1
plot_bootstrap(so, 
               target_id = lichen_eff[7], 
               units = "est_counts", 
               color_by = "condition")

  • some have uneven effect between lichen samples
    • XANPAGTX0501_007280-T1 upregulted in bark samples, but not stone samples
    • XANPAGTX0501_009026-T1 upregulted in stone samples more than in bark samples
    • XANPAGTX0501_009606-T1 upregulated in all but 3 samples
plot_bootstrap(so, 
               target_id = lichen_eff[18], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = lichen_eff[20], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = lichen_eff[23], 
               units = "est_counts", 
               color_by = "condition")

* two odd ones: XANPAGTX0501_008875-T1, XANPAGTX0501_006291-T1

plot_bootstrap(so, 
               target_id = lichen_eff[15], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = lichen_eff[19], 
               units = "est_counts", 
               color_by = "condition")

Upregulated in culture

  • And 7 are upregulated in culture
  eff_list %>% filter(change=="culture") %>%
  kable(format = "html", col.names = colnames(eff_list)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
TranscriptID Product EC_number PFAM_new InterPro_new EggNog_new COG_new GO.Terms_new Protease_new CAZyme_new antiSMASH KO pval qval b se_b mean_obs var_obs tech_var sigma_sq smooth_sigma_sq final_sigma_sq change
XANPAGTX0501_000850-T1 hypothetical protein ENOG503P7C3 0.0000000 0.0000000 2.534878 0.2622001 7.460813 2.0806559 0.0012183 0.4823944 0.0412653 0.4823944 culture
XANPAGTX0501_005118-T1 hypothetical protein ENOG503PWV2, ENOG503PYSR Cluster_1 0.0000000 0.0000000 5.651733 0.6707321 2.530043 11.0765183 0.0109173 3.1537666 0.9389202 3.1537666 culture
XANPAGTX0501_006848-T1 hypothetical protein Cluster_1 0.0000000 0.0000000 3.980717 0.5309641 2.073824 5.8933957 0.0141339 1.9690481 1.1010599 1.9690481 culture
XANPAGTX0501_009170-T1 hypothetical protein PF02015 IPR000334 Glycoside hydrolase, family 45, IPR036908 RlpA-like domain superfamily ENOG503P08U, ENOG503Q3X3 G:(G) Carbohydrate transport and metabolism GO_function: GO:0008810 - cellulase activity [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] GH45 0.0039702 0.0094673 2.253664 0.7823812 5.814142 5.4281710 0.0376799 4.2682699 0.1064086 4.2682699 culture
XANPAGTX0501_010191-T1 hypothetical protein 3.1.1.74 PF01083 IPR000675 Cutinase/acetylxylan esterase, IPR011150 Cutinase, monofunctional, IPR029058 Alpha/Beta hydrolase fold ENOG503P4Q7 G:(G) Carbohydrate transport and metabolism GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0016787 - hydrolase activity [Evidence IEA], GO_function: GO:0050525 - cutinase activity [Evidence IEA] CE5 0.0000009 0.0000045 1.151281 0.2338590 4.966704 0.7039702 0.0094855 0.3752306 0.2077829 0.3752306 culture
XANPAGTX0501_010442-T1 hypothetical protein 0.0005211 0.0015524 1.618450 0.4664592 1.417648 1.6050501 0.2194048 0.7626476 1.3111874 1.3111874 culture
XANPAGTX0501_010474-T1 hypothetical protein IPR008979 Galactose-binding-like domain superfamily Cluster_1 0.0000000 0.0000000 3.724444 0.2985908 7.461490 4.0897241 0.0023959 0.6247736 0.0412642 0.6247736 culture

Expression profiles

  • Out of 7, 3 look okay
culture_eff<-eff_list$TranscriptID[eff_list$TranscriptID %in% sig$target_id[sig$change=="culture"]]
plot_bootstrap(so, 
               target_id = culture_eff[1], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = culture_eff[5], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = culture_eff[7], 
               units = "est_counts", 
               color_by = "condition")

  • weird ones, probably due to the low number of counts across all samples
plot_bootstrap(so, 
               target_id = culture_eff[3], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = culture_eff[2], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = culture_eff[4], 
               units = "est_counts", 
               color_by = "condition")
plot_bootstrap(so, 
               target_id = culture_eff[6], 
               units = "est_counts", 
               color_by = "condition")

3. Create lists for unique genes

Identified 13 genes expressed in all lichen samples and none of the culture samples.

  • This list doesn’t include any potential effectors
lichen_unique<-counts_tab %>% filter(XBC2>0,XBA1>0,XSA2_2>0,XSC1>0,XSE2>0,XBE1>0,XTA2>0,XSC2>0,
  XSA2>0,XBA2>0,XBC1>0,XTA1>0,XSA1>0,XTC2>0,XTE2>0,XBE2>0,XMC2>0,
  S_21XB1==0,KS21XB1==0,S_21XB3==0,S_42XB1==0,S_42XB2==0,S_42XB3==0,
  KS48XB1==0,KS48XB2==0,KS48XB3==0,KS9XB1==0,KS9XB2==0,KS9XB3==0) %>%
  pivot_longer(-target_id,names_to="sample",values_to="counts") %>%
  filter(counts>0, sample !="MP_I",sample !="MP_II") %>% group_by(target_id) %>% summarise(avg_counts_lichen=mean(as.numeric(counts))) %>%  
  left_join(funannot, by=c("target_id"="TranscriptID")) %>%
  select(-c(CDS.transcript,Translation,gDNA,mRNA))
  
lichen_unique %>%
  kable(format = "html", col.names = colnames(lichen_unique)) %>%
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "600px")
target_id avg_counts_lichen GeneID Feature Contig Start Stop Strand Name Product Alias.Synonyms EC_number BUSCO PFAM InterPro EggNog COG GO.Terms Secreted Membrane Protease CAZyme antiSMASH Notes ProtID CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new Secreted_new KO
XANPAGTX0501_001612-T1 46.99245 XANPAGTX0501_001612 mRNA Xp_GTX0501_2 2002693 2003141
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_001612_T1_XANPAGTX0501_001612 NA NA NA NA NA NA NA NA
XANPAGTX0501_002783-T1 18.68803 XANPAGTX0501_002783 mRNA Xp_GTX0501_4 808460 808865
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_002783_T1_XANPAGTX0501_002783 NA NA NA NA NA NA NA NA
XANPAGTX0501_003199-T1 20.17647 XANPAGTX0501_003199 mRNA Xp_GTX0501_5 87909 89177
AIM6_3 Altered inheritance of mitochondria protein 6 NA NA NA NA IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily;IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain ENOG503NZFJ S:(S) Function unknown GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA];GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] NA NA NA NA Cluster_2 NA XANPAGTX0501_003199_T1_XANPAGTX0501_003199 NA S:(S) Function unknown ENOG503NZFJ GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily, IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain NA NA NA
XANPAGTX0501_006970-T1 48.17647 XANPAGTX0501_006970 mRNA Xp_GTX0501_11 354394 354648
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_006970_T1_XANPAGTX0501_006970 NA NA NA NA NA NA NA NA
XANPAGTX0501_006972-T1 74.64706 XANPAGTX0501_006972 mRNA Xp_GTX0501_11 357523 357828
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_006972_T1_XANPAGTX0501_006972 NA NA NA NA NA NA NA NA
XANPAGTX0501_008008-T1 58.88235 XANPAGTX0501_008008 mRNA Xp_GTX0501_14 138465 139439
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_008008_T1_XANPAGTX0501_008008 NA NA NA NA NA NA NA NA
XANPAGTX0501_008856-T1 94.82353 XANPAGTX0501_008856 mRNA Xp_GTX0501_17 399345 400886
NA hypothetical protein NA NA NA NA IPR032675 Leucine-rich repeat domain superfamily NA NA NA NA NA NA NA Cluster_1 NA XANPAGTX0501_008856_T1_XANPAGTX0501_008856 NA NA ENOG503P83U GO_function: GO:0005515 - protein binding [Evidence IEA] IPR001810 F-box domain, IPR032675 Leucine-rich repeat domain superfamily NA NA NA
XANPAGTX0501_009040-T1 57.41176 XANPAGTX0501_009040 mRNA Xp_GTX0501_18 366441 366695
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009040_T1_XANPAGTX0501_009040 NA NA NA NA NA NA NA NA
XANPAGTX0501_009438-T1 963.88235 XANPAGTX0501_009438 mRNA Xp_GTX0501_21 196714 197521
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009438_T1_XANPAGTX0501_009438 NA NA NA NA NA NA NA NA
XANPAGTX0501_009555-T1 1952.88235 XANPAGTX0501_009555 mRNA Xp_GTX0501_22 191143 191546
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009555_T1_XANPAGTX0501_009555 NA NA NA NA NA NA NA NA
XANPAGTX0501_009582-T1 17.00000 XANPAGTX0501_009582 mRNA Xp_GTX0501_22 265438 265938
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA NA NA XANPAGTX0501_009582_T1_XANPAGTX0501_009582 NA NA NA NA NA NA NA NA
XANPAGTX0501_009957-T1 69.00000 XANPAGTX0501_009957 mRNA Xp_GTX0501_27 408 679
NA hypothetical protein NA NA NA NA NA NA NA NA NA NA NA NA Cluster_1 NA XANPAGTX0501_009957_T1_XANPAGTX0501_009957 NA NA NA NA NA NA NA NA
XANPAGTX0501_010684-T1 45.11765 XANPAGTX0501_010684 mRNA Xp_GTX0501_46 20975 22156
NA hypothetical protein NA NA NA PF00067 IPR001128 Cytochrome P450;IPR002401 Cytochrome P450, E-class, group I;IPR017972 Cytochrome P450, conserved site;IPR036396 Cytochrome P450 superfamily ENOG503NWJC Q:(Q) Secondary metabolites biosynthesis, transport and catabolism GO_function: GO:0016705 - oxidoreductase activity, acting on paired donors, with incorporation or reduction of molecular oxygen [Evidence IEA];GO_function: GO:0020037 - heme binding [Evidence IEA];GO_function: GO:0005506 - iron ion binding [Evidence IEA];GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] NA NA NA NA NA NA XANPAGTX0501_010684_T1_XANPAGTX0501_010684 NA Q:(Q) Secondary metabolites biosynthesis, transport and catabolism ENOG503NWJC GO_function: GO:0005506 - iron ion binding [Evidence IEA], GO_function: GO:0016705 - oxidoreductase activity, acting on paired donors, with incorporation or reduction of molecular oxygen [Evidence IEA], GO_function: GO:0020037 - heme binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR001128 Cytochrome P450, IPR002401 Cytochrome P450, E-class, group I, IPR017972 Cytochrome P450, conserved site, IPR036396 Cytochrome P450 superfamily PF00067 NA NA
  • Most of these genes lack any annotation. There are three exceptions
  • Two are unclear:
    • A protein assigned to IPR039559 Altered inheritance of mitochondria protein 6, PI-PLC-like catalytic domain. Function is unclear
    • A protein assigned to IPR001128 Cytochrome P450
  • XANPAGTX0501_008856-T1 is interesting
    • Has a F-box domain and a LRR (leucine-rich repeat) domain
    • This review of fungal F-box proteins says that such proteins
  • act as scavengers in the cell, collecting “junk” proteins to deliver to a “waste processor,” called the SCF complex, to which they dock through their F-box domain. In the SCF complex, the junk proteins are marked with ubiquitin for “incineration” in the proteasome. F-box proteins do not act indiscriminately but recruit specific, often modified proteins to the SCF complex and in this way regulate the level of certain proteins in a cell. F-box proteins are found in all eukaryotes and display a large variety of functions. In fungi they are, for example, involved in control of the cell division cycle, glucose sensing, mitochondrial connectivity, and control of the circadian clock

    • XANPAGTX0501_008856-T1 is from a lichen-enriched orthogroup
    • Expression profile
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008856-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Identified 1 gene that was highly expressed in all culture samples but not at all in the lichens

    culture_unique<-counts_tab %>% filter(XBC2==0,XBA1==0,XSA2_2==0,XSC1==0,XSE2==0,XBE1==0,XTA2==0,XSC2==0,
      XSA2==0,XBA2==0,XBC1==0,XTA1==0,XSA1==0,XTC2==0,XTE2==0,XBE2==0,XMC2==0,
      S_21XB1>0,KS21XB1>0,S_21XB3>0,S_42XB1>0,S_42XB2>0,S_42XB3>0,
      KS48XB1>0,KS48XB2>0,KS48XB3>0,KS9XB1>0,KS9XB2>0,KS9XB3>0) %>% pivot_longer(-target_id,names_to="sample",values_to="counts") %>%
      filter(counts>0, sample !="MP_I",sample !="MP_II") %>% group_by(target_id) %>% summarise(avg_counts_culture=mean(as.numeric(counts))) %>%  
      left_join(funannot, by=c("target_id"="TranscriptID")) %>%
      select(-c(CDS.transcript,Translation,gDNA,mRNA))
    
    culture_unique %>%
      kable(format = "html", col.names = colnames(culture_unique)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id avg_counts_culture GeneID Feature Contig Start Stop Strand Name Product Alias.Synonyms EC_number BUSCO PFAM InterPro EggNog COG GO.Terms Secreted Membrane Protease CAZyme antiSMASH Notes ProtID CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new Secreted_new KO
    XANPAGTX0501_008359-T2 967.2169 XANPAGTX0501_008359 mRNA Xp_GTX0501_15 271800 273051
    NA hypothetical protein NA NA NA PF03881 IPR011009 Protein kinase-like domain superfamily;IPR016477 Fructosamine/Ketosamine-3-kinase ENOG503P2EP G:(G) Carbohydrate transport and metabolism NA NA NA NA NA NA NA XANPAGTX0501_008359_T2_XANPAGTX0501_008359 NA G:(G) Carbohydrate transport and metabolism ENOG503NZHR, ENOG503P2EP NA IPR011009 Protein kinase-like domain superfamily, IPR016477 Fructosamine/Ketosamine-3-kinase PF03881 NA NA
  • According to InterPro, this protein is linked to protein repair
  • 4. Investigate selected genes

    4.1. Both gene models from the MAT-1-2-1 are upregulated in lichens

    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002103-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002104-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    4.2. G-alpha proteins

    • Of six proteins identified as G-alpha, none are in the DGE list. 4 have qval <0.05, but their b values are all between 0 and 1
    galpha<-funannot %>% filter(grepl("IPR001019",InterPro_new))
    
    sig %>% filter(target_id %in% galpha$TranscriptID)
    ## # A tibble: 5 × 12
    ##   target_id        pval     qval     b   se_b mean_obs var_obs tech_var sigma_sq
    ##   <chr>           <dbl>    <dbl> <dbl>  <dbl>    <dbl>   <dbl>    <dbl>    <dbl>
    ## 1 XANPAGTX050… 2.19e-14 3.18e-13 0.625 0.0819     8.08  0.130  0.000966   0.0323
    ## 2 XANPAGTX050… 6.74e- 9 4.96e- 8 0.582 0.100      7.59  0.153  0.000548   0.0702
    ## 3 XANPAGTX050… 4.12e- 4 1.26e- 3 0.491 0.139      7.97  0.192  0.00126    0.135 
    ## 4 XANPAGTX050… 2.57e- 3 6.48e- 3 0.233 0.0771     7.46  0.0288 0.000600   0.0152
    ## 5 XANPAGTX050… 2.58e- 2 4.90e- 2 0.210 0.0941     7.16  0.0712 0.000799   0.0615
    ## # ℹ 3 more variables: smooth_sigma_sq <dbl>, final_sigma_sq <dbl>, change <chr>

    4.4. Genes from lichen-enriched orthogroups

    • The list of lichen-enriched orthogroups is made by Neha, see ../02_long_read_assemblies/notebook/09_lichen_specific_genes.html
    • Heatmap showing all 1226 lichen-specific genes, regardles of DGE status
    funannot2<-read.delim2("../../02_long_read_assemblies/analysis_and_temp_files/09_ortho/lichen_enriched_ortho_in_xanpa.tsv",sep="\t")
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_all.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=funannot2$TranscriptID[funannot2$lichen_ortho==T],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_all.png") 

    • Of them, 361 are differentially expressed. This is 29.5% (compared to 15.6370139% across the genome)
    og_sig<-sig %>% filter(target_id %in% funannot2$TranscriptID[funannot2$lichen_ortho==T])
    og_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      97
    ## 2 lichen      264
    ## 3 low_logFC   374
    • Heatmap with only DGE genes. Lichen samples are split based on substrates
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_dge.png",res=300,width=2000,height=1000)
    plot_transcript_heatmap(so, transcripts=og_sig$target_id[og_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_lichen_enriched_dge.png") 

    • Among the DGE lichen-spefic genes, there are 11 putative effectors (10 upregulated in lichen, 1 in culture). This includes a hydrophobin, a lectin and a thaumatin-like protein
    og_eff<-eff_list %>% filter(change %in% c("lichen","culture"),TranscriptID %in% og_sig$target_id)
    
    og_eff %>% 
      kable(format = "html", col.names = colnames(og_eff)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID Product EC_number PFAM_new InterPro_new EggNog_new COG_new GO.Terms_new Protease_new CAZyme_new antiSMASH KO pval qval b se_b mean_obs var_obs tech_var sigma_sq smooth_sigma_sq final_sigma_sq change
    XANPAGTX0501_000656-T1 hypothetical protein IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins 0e+00 0.0e+00 -4.357565 0.4814432 5.901784 6.3427509 0.0164185 1.6140872 0.0998084 1.6140872 lichen
    XANPAGTX0501_001856-T1 hypothetical protein IPR037176 Osmotin/thaumatin-like superfamily ENOG503P50I 0e+00 0.0e+00 -3.823871 0.4267541 5.375941 4.9088637 0.0242365 1.2568772 0.1485279 1.2568772 lichen
    XANPAGTX0501_002409-T1 hypothetical protein IPR001338 Hydrophobin GO_component: GO:0009277 - fungal-type cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] 0e+00 0.0e+00 -4.088717 0.2304978 9.598187 4.5603784 0.0003597 0.3733771 0.0994788 0.3733771 lichen
    XANPAGTX0501_005118-T1 hypothetical protein ENOG503PWV2, ENOG503PYSR Cluster_1 0e+00 0.0e+00 5.651733 0.6707321 2.530043 11.0765183 0.0109173 3.1537666 0.9389202 3.1537666 culture
    XANPAGTX0501_006142-T1 hypothetical protein 0e+00 0.0e+00 -2.552912 0.1836978 6.016577 1.8662663 0.0052380 0.2321399 0.0918787 0.2321399 lichen
    XANPAGTX0501_006194-T1 hypothetical protein 3.2.1.78 IPR036908 RlpA-like domain superfamily ENOG503P7ES O:(O) Posttranslational modification, protein turnover, chaperones 0e+00 0.0e+00 -1.466337 0.1233940 5.850548 0.6054853 0.0035018 0.0642183 0.1036059 0.1036059 lichen
    XANPAGTX0501_006245-T1 hypothetical protein 0e+00 0.0e+00 -3.929116 0.4012715 4.223801 4.9707307 0.0580619 1.0746222 0.3750018 1.0746222 lichen
    XANPAGTX0501_006967-T1 hypothetical protein 0e+00 0.0e+00 -1.686952 0.1629173 6.974185 0.8949974 0.0013260 0.1853835 0.0471902 0.1853835 lichen
    XANPAGTX0501_007280-T1 hypothetical protein 2e-07 1.2e-06 -1.317948 0.2534485 5.708208 0.8721158 0.0044510 0.4474169 0.1150951 0.4474169 lichen
    XANPAGTX0501_009189-T1 hypothetical protein 0e+00 0.0e+00 -4.014594 0.3923972 2.368268 4.8172641 0.0859581 0.7106675 0.9971806 0.9971806 lichen
    XANPAGTX0501_009311-T1 hypothetical protein PF05730 IPR008427 Extracellular membrane protein, CFEM domain 0e+00 0.0e+00 -1.019264 0.1582712 7.042583 0.4309233 0.0010038 0.1752083 0.0457542 0.1752083 lichen
    • Of 361 DGE lichen-specific genes, 168 have no functional annotation. This is 47%, compared to 0.3081806) across the whole genome

    4.5. Secondary metabolism clusters

    • How many genes are in the genome in total that are annotated as part of a SM gene cluster? 1239 in 59 clusters
      • NB: this is according to funannotate indexing. The number of regions displayed in the antiSMASH output is 49, since funannotate have split some of the ‘mixed’ clusters in to several)
      • Funannotate indexing here is misleading, because it didn’t handle correctly cases in which one gene is assigned to different clusters. I manually compiled a table instead
    funannot2 %>% group_by(antiSMASH,Contig) %>% summarize(n=n()) %>% filter(!is.na(antiSMASH))
    ## `summarise()` has grouped output by 'antiSMASH'. You can override using the
    ## `.groups` argument.
    ## # A tibble: 59 × 3
    ## # Groups:   antiSMASH [7]
    ##    antiSMASH Contig            n
    ##    <chr>     <chr>         <int>
    ##  1 Cluster_1 Xp_GTX0501_1     39
    ##  2 Cluster_1 Xp_GTX0501_10    20
    ##  3 Cluster_1 Xp_GTX0501_11    15
    ##  4 Cluster_1 Xp_GTX0501_12    12
    ##  5 Cluster_1 Xp_GTX0501_13    24
    ##  6 Cluster_1 Xp_GTX0501_14    11
    ##  7 Cluster_1 Xp_GTX0501_15    18
    ##  8 Cluster_1 Xp_GTX0501_17    24
    ##  9 Cluster_1 Xp_GTX0501_2     41
    ## 10 Cluster_1 Xp_GTX0501_24    44
    ## # ℹ 49 more rows
    • Save SM-related genes as a list
    sm_genes<-funannot2 %>% filter(!is.na(antiSMASH)) %>% mutate(cluster = paste0(Contig,"_",antiSMASH)) %>% select(-c(gDNA,mRNA,CDS.transcript,Translation,CAZyme,COG,EggNog,GO.Terms,InterPro,PFAM,Protease,Secreted))
    • 318 of them are differentially expressed. Weirdly enough, 30% of thouse are upregulated in culture!
    sm_sig<-sig %>% filter(target_id %in% sm_genes$TranscriptID)
    sm_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      96
    ## 2 lichen      222
    ## 3 low_logFC   379
    • Heatmap of those
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_SM_dge.png",res=300,width=2000,height=1000)
    plot_transcript_heatmap(so, transcripts=sm_sig$target_id[sm_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_SM_dge.png") 

    • Break down by cluster
    library(splitstackshape)
    antismash<-read.delim2("../../02_long_read_assemblies/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_pred/antismash/antismash_summary.txt")
    
    #get a list of all genes in each cluster
    antismash_gene_all<-antismash %>% mutate(start_num=as.numeric(gsub("^.*?_","",Start)),
                                             end_num=as.numeric(gsub("^.*?_","",End))) %>%
      mutate(gene_number=end_num-start_num+1) %>%
      expandRows("gene_number", drop = FALSE) %>%
      group_by(start_num,end_num) %>%
      mutate(gene_num = seq(start_num[1],end_num[1],by = 1)) %>%
      mutate(GeneID=ifelse(gene_num<1000,paste0("XANPAGTX0501_000",gene_num),
                                                ifelse(gene_num<10000,paste0("XANPAGTX0501_00",gene_num),
                                                paste0("XANPAGTX0501_0",gene_num)))) %>%
      mutate(Core_gene = ifelse(GeneID==Core_gene1|GeneID==Core_gene2,T,F)) %>% ungroup %>%
      select(GeneID,Cluster,Description,Core_gene)
    
    #add transcriptomic results
    summary_antismash_all<-antismash_gene_all %>% left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) %>%
      group_by(Cluster,change) %>% summarize(n=n()) %>% 
      pivot_wider(names_from=change,values_from=n,values_fill=0) %>%
      left_join(antismash)
    ## Joining with `by = join_by(GeneID)`
    ## `summarise()` has grouped output by 'Cluster'. You can override using the
    ## `.groups` argument.
    ## Joining with `by = join_by(Cluster)`
    • Looking at just core biosynthetic genes, weirdly some of them are upregulated in culture
    summary_antismash_core<-antismash_gene_all %>% filter(Core_gene==T) %>%
      left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) %>%
      mutate(change=ifelse(change %in% c("lichen","culture"),change,"Non-DGE")) %>%
      group_by(Cluster) %>% mutate(Core_gene_status=paste(change,collapse=",")) %>%
      select(Cluster,Core_gene_status) %>% distinct()
    ## Joining with `by = join_by(GeneID)`
    summary_antismash_core %>% group_by(Core_gene_status) %>%summarize(n=n())
    ## # A tibble: 6 × 2
    ##   Core_gene_status     n
    ##   <chr>            <int>
    ## 1 Non-DGE             35
    ## 2 Non-DGE,Non-DGE      2
    ## 3 culture              6
    ## 4 culture,culture      1
    ## 5 lichen              14
    ## 6 lichen,Non-DGE       1
    • In most clusters, the mojority is non DGE, as expected. Otherwise, the results are unexpectedly mixed.
      • 6 clusters are definitiely upregulated in lichen (core gene is upregulated in lichen, and no genes are upregulated in culture)
      • One is clearly upregulated in culture! Xp_GTX0501_8_Cluster_1 (unknown NRPS)
      • The rest is mixed
    summary_antismash_all<-summary_antismash_all %>% left_join(summary_antismash_core)
    ## Joining with `by = join_by(Cluster)`
    summary_antismash_all %>%
      kable(format = "html", col.names = colnames(summary_antismash_all)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    Cluster culture lichen low_logFC NA Description Start End Core_gene1 Core_gene2 KnownClusterBlast_MIBIG KnownClusterBlast_description KnownClusterBlast_Similarity KnownClusterBlast_includes_core Core_gene_status
    Xp_GTX0501_10_Cluster_1 2 12 4 2 NRPS-like XANPAGTX0501_006291 XANPAGTX0501_006310 XANPAGTX0501_006297 NA Non-DGE
    Xp_GTX0501_10_Cluster_2 2 9 6 8 T1PKS XANPAGTX0501_006308 XANPAGTX0501_006332 XANPAGTX0501_006319 BGC0002215 HEx-pks23 polyketide<ca> 0.22 1 lichen
    Xp_GTX0501_10_Cluster_3 0 2 14 11 NRPS-like XANPAGTX0501_006344 XANPAGTX0501_006369 XANPAGTX0501_006358 NA Non-DGE
    Xp_GTX0501_10_Cluster_4 2 1 14 9 T3PKS XANPAGTX0501_006734 XANPAGTX0501_006757 XANPAGTX0501_006744 NA Non-DGE
    Xp_GTX0501_11_Cluster_1 3 2 2 7 NRPS XANPAGTX0501_006840 XANPAGTX0501_006853 XANPAGTX0501_006845 BGC0002235 ACE1 0.11 1 lichen
    Xp_GTX0501_12_Cluster_1 1 4 2 5 T1PKS XANPAGTX0501_007263 XANPAGTX0501_007274 XANPAGTX0501_007266 BGC0000146 solanapyrone D 0.33 1 Non-DGE
    Xp_GTX0501_12_Cluster_2 0 9 12 11 T1PKS XANPAGTX0501_007334 XANPAGTX0501_007363 XANPAGTX0501_007346 NA Non-DGE,Non-DGE
    Xp_GTX0501_12_Cluster_3 3 1 4 5 terpene XANPAGTX0501_007388 XANPAGTX0501_007400 XANPAGTX0501_007395 NA Non-DGE
    Xp_GTX0501_13_Cluster_1 1 5 2 14 NRPS-like XANPAGTX0501_007869 XANPAGTX0501_007889 XANPAGTX0501_007880 NA Non-DGE
    Xp_GTX0501_14_Cluster_1 1 2 4 3 terpene XANPAGTX0501_007984 XANPAGTX0501_007993 XANPAGTX0501_007988 NA Non-DGE
    Xp_GTX0501_15_Cluster_1 1 7 8 3 NRPS-like XANPAGTX0501_008267 XANPAGTX0501_008284 XANPAGTX0501_008272 NA lichen
    Xp_GTX0501_17_Cluster_1 0 7 8 7 T1PKS XANPAGTX0501_008845 XANPAGTX0501_008866 XANPAGTX0501_008852 BGC0000156 TAN-1612 biosynthetic gene cluster fromAspergillus niger 0.6 1 lichen
    Xp_GTX0501_1_Cluster_1 5 6 12 15 fungal-RiPP-like XANPAGTX0501_000667 XANPAGTX0501_000702 XANPAGTX0501_000685 NA lichen
    Xp_GTX0501_1_Cluster_2 1 7 4 14 T1PKS XANPAGTX0501_000912 XANPAGTX0501_000937 XANPAGTX0501_000926 BGC0000146 solanapyrone D 0.5 0 Non-DGE
    Xp_GTX0501_24_Cluster_1 10 10 7 17 fungal-RiPP-like XANPAGTX0501_009693 XANPAGTX0501_009736 XANPAGTX0501_009715 XANPAGTX0501_009716 NA culture,culture
    Xp_GTX0501_25_Cluster_1 0 2 4 11 terpene XANPAGTX0501_009858 XANPAGTX0501_009874 XANPAGTX0501_009866 NA Non-DGE
    Xp_GTX0501_27_Cluster_1 1 2 12 7 NRPS-like XANPAGTX0501_009956 XANPAGTX0501_009977 XANPAGTX0501_009964 NA Non-DGE
    Xp_GTX0501_2_Cluster_1 2 6 15 16 fungal-RiPP-like XANPAGTX0501_001046 XANPAGTX0501_001083 XANPAGTX0501_001064 NA lichen
    Xp_GTX0501_2_Cluster_2 1 3 10 14 NRPS XANPAGTX0501_001225 XANPAGTX0501_001250 XANPAGTX0501_001239 NA Non-DGE
    Xp_GTX0501_2_Cluster_3 1 1 8 14 NRPS-like XANPAGTX0501_001291 XANPAGTX0501_001314 XANPAGTX0501_001305 NA Non-DGE
    Xp_GTX0501_2_Cluster_4 0 0 7 6 terpene XANPAGTX0501_001616 XANPAGTX0501_001628 XANPAGTX0501_001622 NA Non-DGE
    Xp_GTX0501_2_Cluster_5 0 2 3 18 T1PKS XANPAGTX0501_001669 XANPAGTX0501_001691 XANPAGTX0501_001679 NA Non-DGE
    Xp_GTX0501_2_Cluster_6 0 0 5 19 NRP-metallophore XANPAGTX0501_001682 XANPAGTX0501_001705 XANPAGTX0501_001696 XANPAGTX0501_001694 NA Non-DGE,Non-DGE
    Xp_GTX0501_2_Cluster_7 0 0 5 16 NRPS XANPAGTX0501_001682 XANPAGTX0501_001702 XANPAGTX0501_001694 NA Non-DGE
    Xp_GTX0501_35_Cluster_1 6 3 3 9 NRPS-like XANPAGTX0501_010404 XANPAGTX0501_010423 XANPAGTX0501_010409 NA culture
    Xp_GTX0501_35_Cluster_2 2 8 4 9 T1PKS XANPAGTX0501_010418 XANPAGTX0501_010440 XANPAGTX0501_010430 NA lichen
    Xp_GTX0501_36_Cluster_1 2 4 9 11 T1PKS XANPAGTX0501_010449 XANPAGTX0501_010474 XANPAGTX0501_010464 BGC0001242 oxyjavanicin 0.25 1 Non-DGE
    Xp_GTX0501_3_Cluster_1 10 8 0 3 NRPS XANPAGTX0501_001961 XANPAGTX0501_001981 XANPAGTX0501_001972 BGC0001741 phyllostictine A/phyllostictine B 0.5 1 culture
    Xp_GTX0501_3_Cluster_2 10 6 0 4 T1PKS XANPAGTX0501_001963 XANPAGTX0501_001982 XANPAGTX0501_001973 BGC0001741 phyllostictine A/phyllostictine B 0.5 1 culture
    Xp_GTX0501_3_Cluster_3 0 7 1 6 terpene XANPAGTX0501_002180 XANPAGTX0501_002193 XANPAGTX0501_002188 NA Non-DGE
    Xp_GTX0501_41_Cluster_1 0 1 9 3 NRPS-like XANPAGTX0501_010609 XANPAGTX0501_010621 XANPAGTX0501_010621 NA lichen
    Xp_GTX0501_43_Cluster_1 0 4 4 16 T1PKS XANPAGTX0501_010622 XANPAGTX0501_010644 XANPAGTX0501_010632 NA Non-DGE
    Xp_GTX0501_48_Cluster_1 3 6 2 8 NRPS-like XANPAGTX0501_010686 XANPAGTX0501_010704 XANPAGTX0501_010692 NA culture
    Xp_GTX0501_4_Cluster_1 1 0 3 9 terpene XANPAGTX0501_002454 XANPAGTX0501_002466 XANPAGTX0501_002460 NA Non-DGE
    Xp_GTX0501_4_Cluster_2 0 6 15 12 NRPS-like XANPAGTX0501_002475 XANPAGTX0501_002501 XANPAGTX0501_002488 NA lichen
    Xp_GTX0501_4_Cluster_3 0 1 6 7 terpene XANPAGTX0501_002520 XANPAGTX0501_002531 XANPAGTX0501_002527 BGC0001839 squalestatin S1 0.4 1 Non-DGE
    Xp_GTX0501_4_Cluster_4 0 3 10 13 NRPS XANPAGTX0501_002525 XANPAGTX0501_002549 XANPAGTX0501_002535 BGC0002164 peramine 1 1 lichen
    Xp_GTX0501_4_Cluster_5 0 3 5 17 NRPS XANPAGTX0501_003108 XANPAGTX0501_003131 XANPAGTX0501_003119 NA lichen
    Xp_GTX0501_50_Cluster_1 0 2 1 3 T1PKS XANPAGTX0501_010705 XANPAGTX0501_010710 XANPAGTX0501_010709 BGC0000146 solanapyrone D 0.33 1 Non-DGE
    Xp_GTX0501_52_Cluster_1 2 3 1 2 NRPS XANPAGTX0501_010711 XANPAGTX0501_010717 XANPAGTX0501_010715 NA Non-DGE
    Xp_GTX0501_5_Cluster_1 4 5 2 9 T1PKS XANPAGTX0501_003170 XANPAGTX0501_003189 XANPAGTX0501_003176 BGC0002515 solanapyrone A 0.4 1 culture
    Xp_GTX0501_5_Cluster_2 7 5 7 7 T1PKS XANPAGTX0501_003179 XANPAGTX0501_003204 XANPAGTX0501_003194 NA Non-DGE
    Xp_GTX0501_5_Cluster_3 0 3 7 16 NRPS,T1PKS XANPAGTX0501_003287 XANPAGTX0501_003311 XANPAGTX0501_003300 BGC0001067 fumagillin/_-trans-bergamotene/fumagillol 0.2 0 Non-DGE
    Xp_GTX0501_5_Cluster_4 2 2 11 13 NRPS-like XANPAGTX0501_003361 XANPAGTX0501_003388 XANPAGTX0501_003374 NA lichen
    Xp_GTX0501_6_Cluster_1 0 3 10 10 T1PKS XANPAGTX0501_004370 XANPAGTX0501_004390 XANPAGTX0501_004380 NA Non-DGE
    Xp_GTX0501_6_Cluster_2 0 4 7 13 T1PKS XANPAGTX0501_004397 XANPAGTX0501_004420 XANPAGTX0501_004410 NA Non-DGE
    Xp_GTX0501_7_Cluster_1 0 4 10 12 T1PKS XANPAGTX0501_004611 XANPAGTX0501_004634 XANPAGTX0501_004622 BGC0001258 1,3,6,8-tetrahydroxynaphthalene 1 1 Non-DGE
    Xp_GTX0501_7_Cluster_2 0 2 7 3 terpene XANPAGTX0501_004765 XANPAGTX0501_004776 XANPAGTX0501_004770 NA lichen
    Xp_GTX0501_7_Cluster_3 1 9 9 13 T1PKS XANPAGTX0501_005055 XANPAGTX0501_005086 XANPAGTX0501_005070 NA Non-DGE
    Xp_GTX0501_7_Cluster_4 1 7 7 10 NRPS-like XANPAGTX0501_005071 XANPAGTX0501_005095 XANPAGTX0501_005087 NA Non-DGE
    Xp_GTX0501_7_Cluster_5 1 8 7 8 T1PKS XANPAGTX0501_005075 XANPAGTX0501_005098 XANPAGTX0501_005090 NA Non-DGE
    Xp_GTX0501_8_Cluster_1 9 0 4 11 NRPS XANPAGTX0501_005105 XANPAGTX0501_005128 XANPAGTX0501_005116 NA culture
    Xp_GTX0501_8_Cluster_2 1 6 9 13 T1PKS XANPAGTX0501_005240 XANPAGTX0501_005265 XANPAGTX0501_005252 NA lichen
    Xp_GTX0501_8_Cluster_3 1 5 10 13 T1PKS XANPAGTX0501_005280 XANPAGTX0501_005306 XANPAGTX0501_005291 BGC0000046 depudecin 0.33 0 Non-DGE
    Xp_GTX0501_8_Cluster_4 5 4 11 15 fungal-RiPP-like XANPAGTX0501_005329 XANPAGTX0501_005361 XANPAGTX0501_005349 NA Non-DGE
    Xp_GTX0501_8_Cluster_5 1 3 4 5 terpene XANPAGTX0501_005377 XANPAGTX0501_005388 XANPAGTX0501_005382 NA lichen,Non-DGE
    Xp_GTX0501_9_Cluster_1 0 4 3 14 T1PKS XANPAGTX0501_005732 XANPAGTX0501_005752 XANPAGTX0501_005742 BGC0001258 1,3,6,8-tetrahydroxynaphthalene 1 1 Non-DGE
    Xp_GTX0501_9_Cluster_2 2 0 5 16 NRPS XANPAGTX0501_005742 XANPAGTX0501_005764 XANPAGTX0501_005751 NA Non-DGE
    Xp_GTX0501_9_Cluster_3 2 3 12 11 T1PKS XANPAGTX0501_005922 XANPAGTX0501_005948 XANPAGTX0501_005935 BGC0002194 epipyrone A 0.75 1 Non-DGE
    • The list of definitiely upregulated in lichen
      • 2 of them have mathces to known clusters and are discussed below
    summary_antismash_all %>% filter(culture==0,lichen>0,grepl("lichen",Core_gene_status))  %>% 
      kable(format = "html", col.names = colnames(summary_antismash_all)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    Cluster culture lichen low_logFC NA Description Start End Core_gene1 Core_gene2 KnownClusterBlast_MIBIG KnownClusterBlast_description KnownClusterBlast_Similarity KnownClusterBlast_includes_core Core_gene_status
    Xp_GTX0501_17_Cluster_1 0 7 8 7 T1PKS XANPAGTX0501_008845 XANPAGTX0501_008866 XANPAGTX0501_008852 BGC0000156 TAN-1612 biosynthetic gene cluster fromAspergillus niger 0.6 1 lichen
    Xp_GTX0501_41_Cluster_1 0 1 9 3 NRPS-like XANPAGTX0501_010609 XANPAGTX0501_010621 XANPAGTX0501_010621 NA lichen
    Xp_GTX0501_4_Cluster_2 0 6 15 12 NRPS-like XANPAGTX0501_002475 XANPAGTX0501_002501 XANPAGTX0501_002488 NA lichen
    Xp_GTX0501_4_Cluster_4 0 3 10 13 NRPS XANPAGTX0501_002525 XANPAGTX0501_002549 XANPAGTX0501_002535 BGC0002164 peramine 1 1 lichen
    Xp_GTX0501_4_Cluster_5 0 3 5 17 NRPS XANPAGTX0501_003108 XANPAGTX0501_003131 XANPAGTX0501_003119 NA lichen
    Xp_GTX0501_7_Cluster_2 0 2 7 3 terpene XANPAGTX0501_004765 XANPAGTX0501_004776 XANPAGTX0501_004770 NA lichen

    Parietin cluster

    • Cross-referencing with Theo Llewellyn’s paper. In his analysis of many lecanoromycete genomes, he identified putative anthraquinone BGC as those similar to 4 MIBiG clusters:
      • the anthraquinones emodin in Escovopsis weberi
      • asperthecin in Aspergillus nidulans FGSC A4
      • endocrocin/clavorubin in Claviceps purpurea
      • the structurally related alternariol in Aspergillus nidulans FGSC A4 and TAN-1612 Aspergillus niger
    • Our genome has one such cluster: Xp_GTX0501_17_Cluster_1 is a TPK1S cluster similar to TAN-1612
    • This cluster is one of the 6 definitely upregulated in lichen
    • This matches Theo’s results, since he also shown one such cluster in the X. parietina from JGI
    • Also the structure of the gene cluster matches what Theo describes as a conserved 4-gene structure unique to Teloschistales:
      • FUN_008850: a transporter with an ABC-casset (IPR003439 ABC transporter-like, ATP-binding domain); - strand
      • FUN_008851: metallo-beta-lactamase-type thioesterase (IPR001279 Metallo-beta-lactamase)
      • FUN_008852: PKS-synthase with SAT-KS-AT-PT-ACP domains (starter unit acyltransferase domain - ketoacyl synthase - acyltransferase - product template - acyl carrier protein)
      • FUN_008853: a dehydratase with a single EthylD domain (IPR009799 EthD domain)
    • None of other PKS clusters had this structure
    • All four of the genes are upregulated in lichen
    sig %>% filter(target_id %in% c("XANPAGTX0501_008850-T1","XANPAGTX0501_008851-T1","XANPAGTX0501_008852-T1","XANPAGTX0501_008853-T1")) %>% select(target_id,qval,b,change)
    ## # A tibble: 4 × 4
    ##   target_id                  qval     b change
    ##   <chr>                     <dbl> <dbl> <chr> 
    ## 1 XANPAGTX0501_008850-T1 6.06e-22 -1.70 lichen
    ## 2 XANPAGTX0501_008851-T1 5.87e-21 -1.93 lichen
    ## 3 XANPAGTX0501_008852-T1 1.87e-22 -2.08 lichen
    ## 4 XANPAGTX0501_008853-T1 3.26e-69 -4.00 lichen
    • Boxplots show clearly that it is upregulated in lichen
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008850-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008851-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008852-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008853-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Peramine-like cluster

    • Main biosynthetic gene from Xp_GTX0501_4_Cluster_4 (XANPAGTX0501_002535) is 65% identical to peramine synthase (evalue=0.0). However, it’s much shorter than the reference (1425 aa vs 2773 aa in the reference)
    • This gene is from a lichen-enriched orthogroup
    • Peramine is an anti-insect alcaloid from Epichloe, which is a mutualistic endophyte. Thought to be offer the host plant protection from herbivory
    • Some lichen compaunds are apparently alkaloids, including rhizocarpic acid, but none of these are known from Xanthoria. There is one report showing trace amounts of alkaloids in Xanthoria
    • This paper describes similar cluster from Cladonia grayi
    • They also report similar cluster from Xanthoria parietina, but deem it non-functional or producing something other than peramine. The reason for that is two missing domains: A1 (first of the adenylylation domains; peramine synthase from Cladonia has two of them, while Xanthoria only one) and R (reductase domain). In total, their annotation has 3 domains:
      • C (condensation domain)
      • A (adenylylation domain)
      • M (methylation domain)
    • According to antiSMASH, our putative peramine synthase has four domains. First three are the same as in the paper, but the last (CP or PCP) is an addition
      • C (condensation domain)
      • A (adenylylation domain)
      • nMT (nitrogen methyltransferase)
      • CP (peptidyl-carrier protein domain)
    • I ran antiSMASH on Cladonia grayi to compare. There, putative peramine synthase has 6 domains: A, CP, C, nMT, CP, NAD. The length of the Cladonia protein is close to that of the reference
    • Conclusion: this cluster is likely functional, but producing something other than peramine

    4.6. Kinases

    • In total, the genome has 144 proteins with a kinase domain. Of them, 24 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR000719",InterPro_new)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE          120
    ## 2 TRUE            24
    • Lichen and culture have similar number of kinases upregulated
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR000719",InterPro_new)) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      12
    ## 2 lichen       13
    ## 3 low_logFC    55
    ## 4 <NA>         64

    4.7. Transporters

    Sugar and polyol transporters

    • In total, the genome has 38 proteins from the Major facilitator, sugar transporter-like family. This is in line with what Philipp identified in other lecanoromycetes. Of them, 5 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR005828",InterPro_new)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE           33
    ## 2 TRUE             5
    • List all lichen-specific
    funannot2 %>% filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% select(TranscriptID,InterPro_new)
    ##             TranscriptID
    ## 1 XANPAGTX0501_001653-T1
    ## 2 XANPAGTX0501_005571-T1
    ## 3 XANPAGTX0501_005572-T1
    ## 4 XANPAGTX0501_008617-T1
    ## 5 XANPAGTX0501_010000-T1
    ##                                                                                                                                                                                                                    InterPro_new
    ## 1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 2                                              IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 3                                                                                    IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 4 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    ## 5                                              IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily
    • Six of them upregulated in lichen, 2 in culture
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new)) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       2
    ## 2 lichen        6
    ## 3 low_logFC    16
    ## 4 <NA>         14
    • Of the 5 lichen-enriched, 3 were upregulated in lichens
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 lichen        3
    ## 2 low_logFC     2
    • Remaining 2 were more present in lichen, but had b-valie between -1 and 0
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR005828",InterPro_new),lichen_ortho==T) %>% select(TranscriptID,change,b,qval)
    ##             TranscriptID    change          b         qval
    ## 1 XANPAGTX0501_001653-T1    lichen -2.0408199 3.742386e-12
    ## 2 XANPAGTX0501_005571-T1    lichen -1.9797831 6.700776e-05
    ## 3 XANPAGTX0501_005572-T1    lichen -1.9025473 1.986579e-05
    ## 4 XANPAGTX0501_008617-T1 low_logFC -0.5728169 3.266494e-07
    ## 5 XANPAGTX0501_010000-T1 low_logFC -0.6135197 2.741270e-04
    • Identitified putative polyol transporter with blast.
      • Used 4 proteins as queries, following Philipp: “AAX98668.1; from Ambrosiozyma monospora, CAR65543.1, CAG86001.1; from Debaryomyces hansenii, NP_010036.1; from Saccharomyces cerevisiae”, saved as analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt
    sbatch --mem=5G -c 1 --wrap="source package d6092385-3a81-49d9-b044-8ffb85d0c446; blastp -query analysis_and_temp_files/08_dge_culture_lichen/polyol_transporters_genbank.fa -subject ../02_long_read_assemblies/analysis_and_temp_files/06_annotate_lecanoro/GTX0501_pred/annotate_results/Xanthoria_parietina_GTX0501.proteins.fa -outfmt 6 -out analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt -evalue 1e-100"
    • Returned 9 proteins (from 8 genes) with matches with e-value<1e-100
    polyol_blast<-read.delim2("../analysis_and_temp_files/08_dge_culture_lichen/blastp_polyol_transporters.txt",header=F)
    polyol_blast<-polyol_blast %>% select(V1,V2,V11,V12)
    colnames(polyol_blast)<-c("query","subject","e-value","score")
    polyol_blast
    ##         query                subject   e-value score
    ## 1  AAX98668.1 XANPAGTX0501_010000-T1 1.49e-179   516
    ## 2  AAX98668.1 XANPAGTX0501_001653-T1 5.32e-176   508
    ## 3  AAX98668.1 XANPAGTX0501_008617-T1 2.52e-114   350
    ## 4  CAR65543.1 XANPAGTX0501_005996-T1       0.0   602
    ## 5  CAR65543.1 XANPAGTX0501_007564-T1 9.63e-178   519
    ## 6  CAR65543.1 XANPAGTX0501_009339-T1 5.39e-151   449
    ## 7  CAR65543.1 XANPAGTX0501_009339-T2 2.57e-149   442
    ## 8  CAR65543.1 XANPAGTX0501_000906-T1 4.21e-145   434
    ## 9  CAR65543.1 XANPAGTX0501_001701-T1 7.68e-133   402
    ## 10 CAG86001.1 XANPAGTX0501_005996-T1       0.0   591
    ## 11 CAG86001.1 XANPAGTX0501_007564-T1 2.80e-178   520
    ## 12 CAG86001.1 XANPAGTX0501_009339-T1 1.51e-152   453
    ## 13 CAG86001.1 XANPAGTX0501_009339-T2 2.52e-150   445
    ## 14 CAG86001.1 XANPAGTX0501_000906-T1 3.81e-146   436
    ## 15 CAG86001.1 XANPAGTX0501_001701-T1 1.48e-132   401
    • Of them 3 were from lichen-enriched orthogroups and 1 was DGE and upregulated in lichen (and also from a lichen-enriched orthogroup)
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(TranscriptID %in% polyol_blast$subject) %>% select(TranscriptID,change,b,qval,lichen_ortho)
    ##             TranscriptID    change          b         qval lichen_ortho
    ## 1 XANPAGTX0501_000906-T1      <NA>         NA           NA        FALSE
    ## 2 XANPAGTX0501_001653-T1    lichen -2.0408199 3.742386e-12         TRUE
    ## 3 XANPAGTX0501_001701-T1 low_logFC  0.6237851 2.869476e-06        FALSE
    ## 4 XANPAGTX0501_005996-T1 low_logFC  0.4003563 2.088997e-06        FALSE
    ## 5 XANPAGTX0501_007564-T1 low_logFC  0.2966283 4.581743e-02        FALSE
    ## 6 XANPAGTX0501_008617-T1 low_logFC -0.5728169 3.266494e-07         TRUE
    ## 7 XANPAGTX0501_009339-T1      <NA>         NA           NA        FALSE
    ## 8 XANPAGTX0501_009339-T2      <NA>         NA           NA        FALSE
    ## 9 XANPAGTX0501_010000-T1 low_logFC -0.6135197 2.741270e-04         TRUE
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_001653-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Ammonium transporters

    • In total, the genome has 2 proteins from the Ammonium transporter, neither is from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR001905",InterPro_new)) %>% group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 1 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE            2
    • One of them is upregulated in lichen
    funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR001905",InterPro_new)) %>% select(TranscriptID,change,b,qval)
    ##             TranscriptID change         b         qval
    ## 1 XANPAGTX0501_004972-T1 lichen -2.248389 3.681322e-56
    ## 2 XANPAGTX0501_009385-T1   <NA>        NA           NA

    All transporters

    • 41 upregulated in lichen, 12 of them (29%) from lichen-enriched orthogroups
    transport_lic<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("transport",InterPro_new),change=="lichen") %>% select(TranscriptID,InterPro_new,lichen_ortho)
    transport_lic %>% 
      kable(format = "html", col.names = c("TranscriptID","InterPro_new","lichen_ortho")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID InterPro_new lichen_ortho
    XANPAGTX0501_000142-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_000342-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000342-T2 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000684-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_000704-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001082-T1 IPR004853 Sugar phosphate transporter domain FALSE
    XANPAGTX0501_001448-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001653-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_002486-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_003065-T3 IPR005178 Organic solute transporter subunit alpha/Transmembrane protein 184 FALSE
    XANPAGTX0501_003337-T2 IPR004738 Phosphate permease, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004383-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_004428-T1 IPR004695 Transporter protein SLAC1/Mae1/ Ssu1/TehA, IPR030185 Malic acid transport protein, IPR038665 Voltage-dependent anion channel superfamily FALSE
    XANPAGTX0501_004624-T1 IPR004737 Nitrate transporter NarK/NarU-like, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004972-T1 IPR001905 Ammonium transporter, IPR018047 Ammonium transporter, conserved site, IPR024041 Ammonium transporter AmtB-like domain, IPR029020 Ammonium/urea transporter FALSE
    XANPAGTX0501_004996-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005027-T1 IPR003370 Chromate transporter, IPR014047 Chromate transporter, long chain FALSE
    XANPAGTX0501_005378-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005489-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005571-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005572-T1 IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005708-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006293-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006324-T1 IPR001204 Phosphate transporter FALSE
    XANPAGTX0501_006367-T1 IPR005829 Sugar transporter, conserved site, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006527-T1 IPR000791 Acetate transporter GPR1/FUN34/SatP family FALSE
    XANPAGTX0501_007882-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_007969-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_008850-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR010929 CDR ABC transporter, IPR013525 ABC-2 type transporter, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029481 ABC-transporter, N-terminal domain, IPR034001 ATP-binding cassette transporter, PDR-like subfamily G, domain 1, IPR034003 ATP-binding cassette transporter, PDR-like subfamily G, domain 2 FALSE
    XANPAGTX0501_009192-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009193-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009202-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009320-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009472-T1 IPR005018 DOMON domain, IPR006593 Cytochrome b561/ferric reductase transmembrane, IPR015920 Cellobiose dehydrogenase, cytochrome domain, IPR018825 Domain of unknown function DUF2427, IPR036259 MFS transporter superfamily, IPR038697 None FALSE
    XANPAGTX0501_009914-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010175-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010196-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010381-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_010444-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010683-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010706-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR011527 ABC transporter type 1, transmembrane domain, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036640 ABC transporter type 1, transmembrane domain superfamily FALSE
    • 35 upregulated in lichen, 3 of them (8.6%) from lichen-enriched orthogroups
    transport_cul<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("transport",InterPro_new),change=="culture") %>% select(TranscriptID,InterPro_new,lichen_ortho)
    transport_lic %>% 
      kable(format = "html", col.names = c("TranscriptID","InterPro_new","lichen_ortho")) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    TranscriptID InterPro_new lichen_ortho
    XANPAGTX0501_000142-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_000342-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000342-T2 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_000684-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_000704-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001082-T1 IPR004853 Sugar phosphate transporter domain FALSE
    XANPAGTX0501_001448-T1 IPR011701 Major facilitator superfamily, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_001653-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_002486-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_003065-T3 IPR005178 Organic solute transporter subunit alpha/Transmembrane protein 184 FALSE
    XANPAGTX0501_003337-T2 IPR004738 Phosphate permease, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004383-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_004428-T1 IPR004695 Transporter protein SLAC1/Mae1/ Ssu1/TehA, IPR030185 Malic acid transport protein, IPR038665 Voltage-dependent anion channel superfamily FALSE
    XANPAGTX0501_004624-T1 IPR004737 Nitrate transporter NarK/NarU-like, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_004972-T1 IPR001905 Ammonium transporter, IPR018047 Ammonium transporter, conserved site, IPR024041 Ammonium transporter AmtB-like domain, IPR029020 Ammonium/urea transporter FALSE
    XANPAGTX0501_004996-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005027-T1 IPR003370 Chromate transporter, IPR014047 Chromate transporter, long chain FALSE
    XANPAGTX0501_005378-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_005489-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005571-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005572-T1 IPR005828 Major facilitator, sugar transporter-like, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_005708-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006293-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006324-T1 IPR001204 Phosphate transporter FALSE
    XANPAGTX0501_006367-T1 IPR005829 Sugar transporter, conserved site, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_006527-T1 IPR000791 Acetate transporter GPR1/FUN34/SatP family FALSE
    XANPAGTX0501_007882-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_007969-T1 IPR003663 Sugar/inositol transporter, IPR005828 Major facilitator, sugar transporter-like, IPR005829 Sugar transporter, conserved site, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_008850-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR010929 CDR ABC transporter, IPR013525 ABC-2 type transporter, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR029481 ABC-transporter, N-terminal domain, IPR034001 ATP-binding cassette transporter, PDR-like subfamily G, domain 1, IPR034003 ATP-binding cassette transporter, PDR-like subfamily G, domain 2 FALSE
    XANPAGTX0501_009192-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009193-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009202-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009320-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_009472-T1 IPR005018 DOMON domain, IPR006593 Cytochrome b561/ferric reductase transmembrane, IPR015920 Cellobiose dehydrogenase, cytochrome domain, IPR018825 Domain of unknown function DUF2427, IPR036259 MFS transporter superfamily, IPR038697 None FALSE
    XANPAGTX0501_009914-T1 IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010175-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010196-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010381-T1 IPR002293 Amino acid/polyamine transporter I FALSE
    XANPAGTX0501_010444-T1 IPR001958 Tetracycline resistance protein TetA/multidrug resistance protein MdtG, IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily TRUE
    XANPAGTX0501_010683-T1 IPR011701 Major facilitator superfamily, IPR020846 Major facilitator superfamily domain, IPR036259 MFS transporter superfamily FALSE
    XANPAGTX0501_010706-T1 IPR003439 ABC transporter-like, ATP-binding domain, IPR003593 AAA+ ATPase domain, IPR011527 ABC transporter type 1, transmembrane domain, IPR017871 ABC transporter-like, conserved site, IPR027417 P-loop containing nucleoside triphosphate hydrolase, IPR036640 ABC transporter type 1, transmembrane domain superfamily FALSE

    4.8. All secreted proteins

    • Visualized all 608 putative secreted proteins
    secreted<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/01_predicting_effectors/all_secreted_list.txt",header=F)
    secreted$TranscriptID<-str_replace(secreted$V1,"FUN","XANPAGTX0501")
    
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_all.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=secreted$TranscriptID,
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_all.png") 

    • Of them, 154 (25.3%) are upregulated in lichen and 40 in culture (6.6%)
    secreted_sig<-sig %>% filter(target_id %in% secreted$TranscriptID)
    secreted_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture      40
    ## 2 lichen      154
    ## 3 low_logFC   192
    • In lichen, secreted proteins make 0.1299578 of upregulated genes
      • compared to 0.0543585 across the whole genome
      • the number for upregulated in culture is 0.070922
    • Visualize only DGE secreted proteins
    secreted<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/01_predicting_effectors/all_secreted_list.txt",header=F)
    secreted$TranscriptID<-str_replace(secreted$V1,"FUN","XANPAGTX0501")
    
    png("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_dge.png",res=300,width=2000,height=1500)
    plot_transcript_heatmap(so, transcripts=secreted_sig$target_id[secreted_sig$change!="low_logFC"],
                            cluster_transcripts=T,show_rownames=F,
                            clustering_callback = callback)
    dev.off()
    ## pdf 
    ##   4
    knitr::include_graphics("../analysis_and_temp_files/08_dge_culture_lichen/heatmap_secreted_dge.png") 

    • All secreted proteins upregulated in lichen. There are a bunch of lectins there, GHs and other hydrolases, and a killer toxin
      • Notably, many genes classified as non-effectors have functinal annotations similar to those classified as effectors???
      • Most strongly upregulated are a GH128 (beta-1,3-glucanase / laminarinase), lectins, and a toxin
    secreted_table_lichen<-secreted_sig %>% 
      filter(change=="lichen") %>% left_join(funannot2,by = c("target_id"="TranscriptID")) %>% select(target_id,b,Product,CAZyme_new,COG_new,EggNog_new,GO.Terms_new,InterPro_new,PFAM_new,Protease_new,KO,lichen_ortho) %>% mutate(effector_from_small_list=ifelse(target_id %in% eff_list$TranscriptID,T,F))%>%
      arrange(b)
    secreted_table_lichen %>%
      kable(format = "html", col.names = colnames(secreted_table_lichen)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id b Product CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new KO lichen_ortho effector_from_small_list
    XANPAGTX0501_001127-T1 -6.214566 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE FALSE
    XANPAGTX0501_000655-T1 -6.186697 hypothetical protein NA NA NA NA IPR035992 Ricin B-like lectins NA NA FALSE FALSE
    XANPAGTX0501_009887-T1 -6.152645 hypothetical protein NA NA ENOG503P63W NA NA NA NA FALSE FALSE
    XANPAGTX0501_007865-T1 -5.732812 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005533-T1 -5.599048 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_010170-T1 -5.061039 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE FALSE
    XANPAGTX0501_001063-T1 -4.589396 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_009135-T1 -4.527295 hypothetical protein NA NA NA GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_process: GO:0009405 - pathogenesis [Evidence IEA] IPR011329 Killer toxin Kp4/SMK, IPR015131 Killer toxin, Kp4 PF09044 NA FALSE FALSE
    XANPAGTX0501_005364-T1 -4.400832 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_000656-T1 -4.357565 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE TRUE
    XANPAGTX0501_002409-T1 -4.088717 hypothetical protein NA NA NA GO_component: GO:0009277 - fungal-type cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR001338 Hydrophobin NA NA TRUE TRUE
    XANPAGTX0501_009189-T1 -4.014594 hypothetical protein NA NA NA NA NA NA NA TRUE TRUE
    XANPAGTX0501_001935-T1 -3.959675 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006245-T1 -3.929116 hypothetical protein NA NA NA NA NA NA NA TRUE TRUE
    XANPAGTX0501_005061-T1 -3.850853 hypothetical protein NA S:(S) Function unknown ENOG503NVBA NA NA PF13668 NA FALSE FALSE
    XANPAGTX0501_001856-T1 -3.823871 hypothetical protein NA NA ENOG503P50I NA IPR037176 Osmotin/thaumatin-like superfamily NA NA TRUE TRUE
    XANPAGTX0501_006282-T1 -3.776836 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_004309-T1 -3.720276 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_004881-T1 -3.693406 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_001015-T1 -3.506517 hypothetical protein NA S:(S) Function unknown ENOG503P58T, ENOG503P74E, ENOG503P76M GO_process: GO:0042742 - defense response to bacterium [Evidence IEA], GO_process: GO:0050832 - defense response to fungus [Evidence IEA] IPR001153 Barwin domain, IPR009009 RlpA-like protein, double-psi beta-barrel domain, IPR036908 RlpA-like domain superfamily PF00967, PF03330 NA FALSE TRUE
    XANPAGTX0501_010044-T1 -3.501860 hypothetical protein NA Q:(Q) Secondary metabolites biosynthesis, transport and catabolism ENOG503NTXA GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR002937 Amine oxidase, IPR036188 FAD/NAD(P)-binding domain superfamily PF13450 NA FALSE FALSE
    XANPAGTX0501_009317-T1 -3.484947 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_009340-T1 -3.459083 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006327-T1 -3.436352 hypothetical protein NA NA NA NA IPR018466 Yeast cell wall synthesis Kre9/Knh1-like, N-terminal PF10342 NA FALSE FALSE
    XANPAGTX0501_004930-T1 -3.341143 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002446-T1 -3.286528 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003765-T1 -3.256318 hypothetical protein NA NA NA GO_component: GO:0005618 - cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR000420 Yeast PIR protein repeat PF00399 NA FALSE FALSE
    XANPAGTX0501_005256-T1 -3.224319 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NV4F GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE FALSE
    XANPAGTX0501_006846-T1 -3.098928 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010449-T1 -2.870515 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P0GD GO_function: GO:0004222 - metalloendopeptidase activity [Evidence IEA], GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001384 Peptidase M35, deuterolysin, IPR024079 Metallopeptidase, catalytic domain superfamily, IPR029463 Lysine-specific metallo-endopeptidase PF02102, PF14521 M35 FALSE FALSE
    XANPAGTX0501_009688-T1 -2.868656 hypothetical protein NA NA NA GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0030248 - cellulose binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000254 Cellulose-binding domain, fungal, IPR035971 Cellulose-binding domain superfamily, IPR036908 RlpA-like domain superfamily PF00734 NA FALSE TRUE
    XANPAGTX0501_003270-T1 -2.857225 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_004956-T1 -2.837976 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005675-T1 -2.825087 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZAM GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR008754 Peptidase M43, pregnancy-associated plasma-A, IPR024079 Metallopeptidase, catalytic domain superfamily PF05572, PF13688 M43B FALSE TRUE
    XANPAGTX0501_000163-T1 -2.730902 hypothetical protein NA S:(S) Function unknown ENOG503NXZU NA IPR009078 Ferritin-like superfamily PF13668 NA TRUE FALSE
    XANPAGTX0501_009886-T1 -2.690801 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_009668-T1 -2.648950 hypothetical protein NA NA ENOG503P6EY NA NA NA NA FALSE FALSE
    XANPAGTX0501_001304-T1 -2.616995 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006142-T1 -2.552912 hypothetical protein NA NA NA NA NA NA NA TRUE TRUE
    XANPAGTX0501_003572-T1 -2.535693 hypothetical protein CE16 S:(S) Function unknown ENOG503P02M GO_function: GO:0016788 - hydrolase activity, acting on ester bonds [Evidence IEA] IPR001087 GDSL lipase/esterase, IPR036514 SGNH hydrolase superfamily PF00657 NA FALSE FALSE
    XANPAGTX0501_009606-T1 -2.529422 hypothetical protein NA NA ENOG503PMNB NA IPR011024 Gamma-crystallin-like NA NA FALSE TRUE
    XANPAGTX0501_006291-T1 -2.524612 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_008978-T1 -2.518736 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005573-T1 -2.506069 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_001458-T1 -2.475919 hypothetical protein NA G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG503NVEA, ENOG503P073 NA IPR002889 Carbohydrate-binding WSC, IPR018535 Domain of unknown function DUF1996 PF01822, PF09362 NA FALSE FALSE
    XANPAGTX0501_005361-T1 -2.474153 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_007976-T1 -2.346686 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009683-T1 -2.345949 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005733-T1 -2.302949 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_010693-T1 -2.297042 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NVX6 GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565 NA TRUE FALSE
    XANPAGTX0501_007272-T1 -2.264531 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002412-T1 -2.245791 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010247-T1 -2.240976 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_007515-T1 -2.210135 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NUDE, ENOG503NV31 GO_function: GO:0004190 - aspartic-type endopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001461 Aspartic peptidase A1 family, IPR001969 Aspartic peptidase, active site, IPR021109 Aspartic peptidase domain superfamily, IPR033121 Peptidase family A1 domain, IPR034163 Aspergillopepsin-like catalytic domain PF00026 A01A FALSE FALSE
    XANPAGTX0501_005771-T1 -2.208725 hypothetical protein GH5 G:(G) Carbohydrate transport and metabolism ENOG503NYXZ GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR001547 Glycoside hydrolase, family 5, IPR017853 Glycoside hydrolase superfamily PF00150 NA FALSE FALSE
    XANPAGTX0501_007362-T1 -2.200339 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010580-T1 -2.189884 hypothetical protein NA G:(G) Carbohydrate transport and metabolism ENOG501RPBK GO_function: GO:0005515 - protein binding [Evidence IEA] IPR011048 Cytochrome cd1-nitrite reductase-like, haem d1 domain superfamily, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR019405 Lactonase, 7-bladed beta propeller PF10282 NA FALSE FALSE
    XANPAGTX0501_004978-T1 -2.183515 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_001708-T1 -2.176671 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_009730-T1 -2.155593 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009615-T1 -2.152340 hypothetical protein NA NA NA NA IPR035940 CAP superfamily NA NA FALSE FALSE
    XANPAGTX0501_003873-T1 -2.109258 hypothetical protein GH25 G:(G) Carbohydrate transport and metabolism ENOG503P1T2 GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA] IPR002053 Glycoside hydrolase, family 25, IPR017853 Glycoside hydrolase superfamily, IPR018077 Glycoside hydrolase, family 25 subgroup PF01183 NA FALSE FALSE
    XANPAGTX0501_004880-T1 -2.100330 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002331-T1 -2.094181 hypothetical protein GH5 G:(G) Carbohydrate transport and metabolism ENOG503NY10 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR001547 Glycoside hydrolase, family 5, IPR017853 Glycoside hydrolase superfamily, IPR018087 Glycoside hydrolase, family 5, conserved site PF00150 NA FALSE FALSE
    XANPAGTX0501_007467-T1 -2.047189 hypothetical protein CE3 G:(G) Carbohydrate transport and metabolism, S:(S) Function unknown ENOG503NXZG, ENOG503P4E6 NA IPR013830 SGNH hydrolase-type esterase domain, IPR036514 SGNH hydrolase superfamily PF00657, PF13472, PF13517 NA FALSE FALSE
    XANPAGTX0501_008041-T1 -2.018648 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010250-T1 -1.956382 hypothetical protein NA NA ENOG503P3M4 NA NA NA NA FALSE TRUE
    XANPAGTX0501_004329-T1 -1.953200 hypothetical protein NA NA NA NA IPR020915 Uncharacterised protein family UPF0311 PF11578 NA TRUE FALSE
    XANPAGTX0501_008875-T1 -1.934027 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_009699-T1 -1.923092 hypothetical protein GH16 G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZM7, ENOG503PDKW GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000757 Glycoside hydrolase family 16, IPR001002 Chitin-binding, type 1, IPR008264 Beta-glucanase, IPR013320 Concanavalin A-like lectin/glucanase domain superfamily, IPR036861 Endochitinase-like superfamily PF00722 NA FALSE FALSE
    XANPAGTX0501_010470-T1 -1.921148 hypothetical protein GH24 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P2E0, ENOG503P2RE GO_function: GO:0003796 - lysozyme activity [Evidence IEA], GO_process: GO:0009253 - peptidoglycan catabolic process [Evidence IEA], GO_process: GO:0016998 - cell wall macromolecule catabolic process [Evidence IEA], GO_process: GO:0019076 - viral release from host cell [Evidence IEA], GO_process: GO:0019835 - cytolysis [Evidence IEA] IPR002196 Glycoside hydrolase, family 24, IPR009045 Hedgehog signalling/DD-peptidase zinc-binding domain superfamily, IPR023346 Lysozyme-like domain superfamily, IPR033907 Endolysin/autolysin, IPR034690 Endolysin T4 type PF00959 NA K01185 FALSE TRUE
    XANPAGTX0501_000609-T1 -1.909619 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_006349-T1 -1.876403 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_000051-T1 -1.859342 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_002665-T1 -1.843112 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA TRUE FALSE
    XANPAGTX0501_001832-T1 -1.829702 hypothetical protein GH128 NA NA NA IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA TRUE FALSE
    XANPAGTX0501_004372-T1 -1.826518 hypothetical protein NA G:(G) Carbohydrate transport and metabolism, T:(T) Signal transduction mechanisms ENOG503NV5A, ENOG503P0DZ NA IPR002018 Carboxylesterase, type B, IPR019819 Carboxylesterase type B, conserved site, IPR019826 Carboxylesterase type B, active site, IPR029058 Alpha/Beta hydrolase fold PF00135, PF20434 S09X FALSE FALSE
    XANPAGTX0501_010022-T1 -1.809156 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_000923-T1 -1.786540 hypothetical protein NA P:(P) Inorganic ion transport and metabolism ENOG503NV06 GO_function: GO:0003993 - acid phosphatase activity [Evidence IEA], GO_function: GO:0046872 - metal ion binding [Evidence IEA] IPR008963 Purple acid phosphatase-like, N-terminal, IPR018946 PhoD-like phosphatase, metallophosphatase domain, IPR032093 Phospholipase D, N-terminal, IPR038607 PhoD-like superfamily PF09423, PF16655 NA K01113 FALSE FALSE
    XANPAGTX0501_003361-T1 -1.748190 hypothetical protein NA NA ENOG503P519 NA NA NA NA FALSE FALSE
    XANPAGTX0501_009511-T1 -1.745629 hypothetical protein GH16 G:(G) Carbohydrate transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NZM7, ENOG503PDKW GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000757 Glycoside hydrolase family 16, IPR001002 Chitin-binding, type 1, IPR008264 Beta-glucanase, IPR013320 Concanavalin A-like lectin/glucanase domain superfamily, IPR036861 Endochitinase-like superfamily PF00722 NA K01216 FALSE FALSE
    XANPAGTX0501_008019-T1 -1.720017 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002191-T1 -1.708273 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006967-T1 -1.686952 hypothetical protein NA NA NA NA NA NA NA TRUE TRUE
    XANPAGTX0501_005715-T1 -1.646345 hypothetical protein AA7 C:(C) Energy production and conversion, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NXN6, ENOG503P15H GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA FALSE FALSE
    XANPAGTX0501_006283-T1 -1.624636 hypothetical protein NA NA NA NA IPR000772 Ricin B, lectin domain, IPR035992 Ricin B-like lectins NA NA TRUE FALSE
    XANPAGTX0501_008270-T1 -1.619871 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005641-T1 -1.614221 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_010366-T1 -1.609971 hypothetical protein AA9 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NYCR, ENOG503Q4YD GO_function: GO:0003676 - nucleic acid binding [Evidence IEA], GO_function: GO:0008061 - chitin binding [Evidence IEA] IPR001002 Chitin-binding, type 1, IPR005103 Auxiliary Activity family 9, IPR013087 Zinc finger C2H2-type, IPR018371 Chitin-binding, type 1, conserved site, IPR036861 Endochitinase-like superfamily PF00187, PF03443 NA FALSE FALSE
    XANPAGTX0501_000467-T1 -1.600285 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_008622-T1 -1.585245 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_004346-T1 -1.568986 hypothetical protein NA S:(S) Function unknown ENOG503NUT8 NA IPR027589 Choice-of-anchor B domain NA NA FALSE FALSE
    XANPAGTX0501_006257-T1 -1.562667 hypothetical protein NA G:(G) Carbohydrate transport and metabolism ENOG503NW21 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000490 Glycoside hydrolase family 17, IPR017853 Glycoside hydrolase superfamily PF00332 NA K01210 FALSE FALSE
    XANPAGTX0501_007939-T1 -1.548201 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_010021-T1 -1.539151 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009026-T1 -1.512207 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_003287-T1 -1.493602 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_002186-T1 -1.483447 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006194-T1 -1.466337 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P7ES NA IPR036908 RlpA-like domain superfamily NA NA TRUE TRUE
    XANPAGTX0501_004515-T1 -1.449838 hypothetical protein NA NA NA NA IPR032710 NTF2-like domain superfamily, IPR037401 SnoaL-like domain PF13577 NA FALSE FALSE
    XANPAGTX0501_010040-T1 -1.448217 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010027-T1 -1.444367 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_009972-T1 -1.430084 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503Q3UA GO_function: GO:0004252 - serine-type endopeptidase activity [Evidence IEA], GO_function: GO:0008236 - serine-type peptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR000209 Peptidase S8/S53 domain, IPR015366 Peptidase S53, activation domain, IPR030400 Sedolisin domain, IPR036852 Peptidase S8/S53 domain superfamily PF00082, PF09286 S53 TRUE FALSE
    XANPAGTX0501_001919-T1 -1.411388 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_000162-T1 -1.389788 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003766-T1 -1.385254 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003448-T1 -1.383005 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_009936-T1 -1.379267 hypothetical protein AA5, CBM32 O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG502I4WM, ENOG503NY8H GO_function: GO:0005515 - protein binding [Evidence IEA] IPR000421 Coagulation factor 5/8 C-terminal domain, IPR006652 Kelch repeat type 1, IPR008979 Galactose-binding-like domain superfamily, IPR011043 Galactose oxidase/kelch, beta-propeller, IPR013783 Immunoglobulin-like fold, IPR014756 Immunoglobulin E-set, IPR015202 Galactose oxidase-like, Early set domain, IPR037293 Galactose oxidase, central domain superfamily PF00754, PF01344, PF09118 NA FALSE FALSE
    XANPAGTX0501_008146-T1 -1.360688 hypothetical protein NA E:(E) Amino acid transport and metabolism, O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P41K GO_function: GO:0004185 - serine-type carboxypeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001563 Peptidase S10, serine carboxypeptidase, IPR018202 Serine carboxypeptidase, serine active site, IPR029058 Alpha/Beta hydrolase fold PF00450 S10 K01288 FALSE FALSE
    XANPAGTX0501_010026-T1 -1.349396 hypothetical protein NA S:(S) Function unknown ENOG503P0BZ NA IPR018392 LysM domain, IPR036779 LysM domain superfamily PF01476 NA FALSE FALSE
    XANPAGTX0501_001455-T1 -1.338580 Sperm-associated antigen 4 protein GH132 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NUJ1 NA IPR005556 SUN family PF03856 NA K01238 FALSE FALSE
    XANPAGTX0501_002315-T2 -1.329521 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_009690-T1 -1.325638 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_007280-T1 -1.317948 hypothetical protein NA NA NA NA NA NA NA TRUE TRUE
    XANPAGTX0501_004983-T1 -1.308098 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005133-T1 -1.291558 hypothetical protein NA S:(S) Function unknown ENOG503NXPR NA IPR039535 Arylsulfotransferase-like PF05935, PF14269 NA FALSE FALSE
    XANPAGTX0501_000606-T1 -1.268241 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_007268-T1 -1.263293 hypothetical protein AA7 C:(C) Energy production and conversion, S:(S) Function unknown ENOG503NXRX, ENOG503PBAU GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE FALSE
    XANPAGTX0501_004065-T1 -1.250886 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006382-T1 -1.241749 hypothetical protein NA NA ENOG503P53I NA NA NA NA FALSE FALSE
    XANPAGTX0501_005717-T1 -1.235652 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_008990-T1 -1.228845 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009689-T1 -1.216985 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009731-T1 -1.215351 hypothetical protein GH71 O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NVTC GO_function: GO:0016787 - hydrolase activity [Evidence IEA] IPR005197 Glycoside hydrolase family 71 PF03659 NA K08254 FALSE FALSE
    XANPAGTX0501_008429-T1 -1.195150 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_001183-T1 -1.191723 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_000737-T1 -1.183761 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010624-T1 -1.176356 hypothetical protein NA J:(J) Translation, ribosomal structure and biogenesis ENOG503NVR9 NA IPR023631 Amidase signature domain, IPR036928 Amidase signature (AS) superfamily NA NA FALSE FALSE
    XANPAGTX0501_003606-T1 -1.168749 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005629-T1 -1.163876 hypothetical protein NA S:(S) Function unknown ENOG503NY4Y GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily, IPR030395 Glycerophosphodiester phosphodiesterase domain PF03009 NA FALSE FALSE
    XANPAGTX0501_002565-T1 -1.159972 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006751-T1 -1.137824 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_010682-T1 -1.134135 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_000697-T1 -1.131109 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002041-T1 -1.127546 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_006292-T1 -1.107115 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_010443-T1 -1.107115 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_005317-T1 -1.102348 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002543-T1 -1.098046 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_006860-T1 -1.097166 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_007655-T1 -1.094363 hypothetical protein NA S:(S) Function unknown ENOG503NV1P GO_function: GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA], GO_process: GO:0006629 - lipid metabolic process [Evidence IEA] IPR017946 PLC-like phosphodiesterase, TIM beta/alpha-barrel domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_001970-T1 -1.089366 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009607-T1 -1.069439 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_010370-T1 -1.066233 hypothetical protein NA NA NA NA IPR003609 PAN/Apple domain PF00024 NA FALSE FALSE
    XANPAGTX0501_002412-T2 -1.065097 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003285-T1 -1.064411 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503NV2A GO_function: GO:0008236 - serine-type peptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR008758 Peptidase S28, IPR029058 Alpha/Beta hydrolase fold, IPR042269 Serine carboxypeptidase S28, SKS domain NA S28 K09649 FALSE FALSE
    XANPAGTX0501_000434-T1 -1.045784 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003186-T1 -1.045733 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_009011-T1 -1.036787 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_002514-T1 -1.030316 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_008683-T1 -1.021294 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009311-T1 -1.019264 hypothetical protein NA NA NA NA IPR008427 Extracellular membrane protein, CFEM domain PF05730 NA TRUE TRUE
    XANPAGTX0501_008027-T1 -1.017996 hypothetical protein NA S:(S) Function unknown ENOG503NZ6V GO_function: GO:0004601 - peroxidase activity [Evidence IEA] IPR000028 Chloroperoxidase, IPR036851 Chloroperoxidase-like superfamily PF01328 NA FALSE FALSE
    XANPAGTX0501_003124-T1 -1.008637 hypothetical protein NA O:(O) Posttranslational modification, protein turnover, chaperones ENOG503P2G1 GO_function: GO:0004190 - aspartic-type endopeptidase activity [Evidence IEA], GO_process: GO:0006508 - proteolysis [Evidence IEA] IPR001461 Aspartic peptidase A1 family, IPR001969 Aspartic peptidase, active site, IPR021109 Aspartic peptidase domain superfamily, IPR033121 Peptidase family A1 domain, IPR034164 Pepsin-like domain PF00026 A01A FALSE FALSE
    • All secreted proteins upregulated in culture, arranged by b value. Mostly enzymes
      • Most strongly upregulated are AA7, AA5, GH128 and galactose-binding enzymes
    secreted_table_culture<-secreted_sig %>% 
      filter(change=="culture") %>% left_join(funannot2,by = c("target_id"="TranscriptID")) %>% select(target_id,b,Product,CAZyme_new,COG_new,EggNog_new,GO.Terms_new,InterPro_new,PFAM_new,Protease_new,KO,lichen_ortho) %>% mutate(effector_from_small_list=ifelse(target_id %in% eff_list$TranscriptID,T,F)) %>%
      arrange(desc(b))
    secreted_table_culture %>%
      kable(format = "html", col.names = colnames(secreted_table_culture)) %>%
      kable_styling() %>%
      kableExtra::scroll_box(width = "100%", height = "600px")
    target_id b Product CAZyme_new COG_new EggNog_new GO.Terms_new InterPro_new PFAM_new Protease_new KO lichen_ortho effector_from_small_list
    XANPAGTX0501_005691-T1 5.859572 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NVX6 GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565 NA TRUE FALSE
    XANPAGTX0501_005118-T1 5.651733 hypothetical protein NA NA ENOG503PWV2, ENOG503PYSR NA NA NA NA TRUE TRUE
    XANPAGTX0501_006848-T1 3.980717 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_008294-T1 3.732327 hypothetical protein AA5, CBM32 O:(O) Posttranslational modification, protein turnover, chaperones, S:(S) Function unknown ENOG502I4WM, ENOG503NY8H GO_function: GO:0005515 - protein binding [Evidence IEA] IPR000421 Coagulation factor 5/8 C-terminal domain, IPR006652 Kelch repeat type 1, IPR008979 Galactose-binding-like domain superfamily, IPR011043 Galactose oxidase/kelch, beta-propeller, IPR013783 Immunoglobulin-like fold, IPR014756 Immunoglobulin E-set, IPR015202 Galactose oxidase-like, Early set domain, IPR037293 Galactose oxidase, central domain superfamily PF00754, PF01344, PF09118 NA FALSE FALSE
    XANPAGTX0501_010474-T1 3.724444 hypothetical protein NA NA NA NA IPR008979 Galactose-binding-like domain superfamily NA NA FALSE TRUE
    XANPAGTX0501_003863-T2 3.301034 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_000306-T1 3.231592 hypothetical protein NA S:(S) Function unknown ENOG503P21A NA IPR021851 Protein of unknown function DUF3455 PF11937 NA FALSE FALSE
    XANPAGTX0501_006869-T1 3.075100 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE FALSE
    XANPAGTX0501_005759-T1 2.880832 hypothetical protein NA NA ENOG503P558 NA NA NA NA FALSE FALSE
    XANPAGTX0501_009761-T1 2.701300 hypothetical protein NA S:(S) Function unknown ENOG503NZQD GO_component: GO:0005576 - extracellular region [Evidence IEA] IPR001283 Cysteine-rich secretory protein-related, IPR014044 CAP domain, IPR018244 Allergen V5/Tpx-1-related, conserved site, IPR034120 None, IPR035940 CAP superfamily PF00188 NA K20412 FALSE FALSE
    XANPAGTX0501_000850-T1 2.534878 hypothetical protein NA NA ENOG503P7C3 NA NA NA NA FALSE TRUE
    XANPAGTX0501_006926-T1 2.528494 hypothetical protein NA NA NA NA NA NA NA TRUE FALSE
    XANPAGTX0501_009613-T1 2.515673 hypothetical protein NA S:(S) Function unknown ENOG503NZZ0 NA IPR000073 Alpha/beta hydrolase fold-1, IPR029058 Alpha/Beta hydrolase fold PF12697 NA TRUE FALSE
    XANPAGTX0501_010126-T2 2.407552 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_009170-T1 2.253664 hypothetical protein GH45 G:(G) Carbohydrate transport and metabolism ENOG503P08U, ENOG503Q3X3 GO_function: GO:0008810 - cellulase activity [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR000334 Glycoside hydrolase, family 45, IPR036908 RlpA-like domain superfamily PF02015 NA FALSE TRUE
    XANPAGTX0501_001974-T1 2.218747 hypothetical protein AA7 C:(C) Energy production and conversion ENOG503NWXH GO_function: GO:0016491 - oxidoreductase activity [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_function: GO:0071949 - FAD binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR006094 FAD linked oxidase, N-terminal, IPR012951 Berberine/berberine-like, IPR016166 FAD-binding domain, PCMH-type, IPR016167 FAD-binding, type PCMH, subdomain 1, IPR016169 FAD-binding, type PCMH, subdomain 2, IPR036318 FAD-binding, type PCMH-like superfamily PF01565, PF08031 NA TRUE FALSE
    XANPAGTX0501_010307-T1 1.956031 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_003850-T1 1.889111 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_009609-T1 1.620965 hypothetical protein NA I:(I) Lipid transport and metabolism, S:(S) Function unknown ENOG503NZ1V GO_function: GO:0004806 - triglyceride lipase activity [Evidence IEA], GO_process: GO:0016042 - lipid catabolic process [Evidence IEA] IPR000073 Alpha/beta hydrolase fold-1, IPR005152 Lipase, secreted, IPR022742 Serine aminopeptidase, S33, IPR029058 Alpha/Beta hydrolase fold PF03583, PF12146, PF12697 NA TRUE FALSE
    XANPAGTX0501_010442-T1 1.618450 hypothetical protein NA NA NA NA NA NA NA FALSE TRUE
    XANPAGTX0501_005664-T1 1.551288 hypothetical protein GH128 NA NA NA IPR008979 Galactose-binding-like domain superfamily, IPR017853 Glycoside hydrolase superfamily, IPR024655 Asl1-like, glycosyl hydrolase catalytic domain PF11790 NA FALSE FALSE
    XANPAGTX0501_007937-T1 1.503271 hypothetical protein NA NA ENOG503NVDR GO_function: GO:0005515 - protein binding [Evidence IEA] IPR011044 Quinoprotein amine dehydrogenase, beta chain-like, IPR015943 WD40/YVTN repeat-like-containing domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_000401-T1 1.401801 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_007985-T1 1.397612 hypothetical protein NA NA NA GO_function: GO:0008237 - metallopeptidase activity [Evidence IEA] IPR024079 Metallopeptidase, catalytic domain superfamily NA NA FALSE FALSE
    XANPAGTX0501_006091-T1 1.369228 hypothetical protein NA NA ENOG503P8K1 NA NA NA NA FALSE FALSE
    XANPAGTX0501_009962-T1 1.333092 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_007182-T1 1.319524 alpha,alpha-trehalase ath1 NA G:(G) Carbohydrate transport and metabolism ENOG503NUMQ GO_function: GO:0003824 - catalytic activity [Evidence IEA], GO_function: GO:0030246 - carbohydrate binding [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR005195 Glycoside hydrolase, family 65, central catalytic, IPR005196 Glycoside hydrolase, family 65, N-terminal, IPR008928 Six-hairpin glycosidase superfamily, IPR008979 Galactose-binding-like domain superfamily, IPR011013 Galactose mutarotase-like domain superfamily, IPR012341 Six-hairpin glycosidase-like superfamily, IPR037018 Glycoside hydrolase family 65, N-terminal domain superfamily PF03633, PF03636 NA FALSE FALSE
    XANPAGTX0501_010132-T1 1.308632 hypothetical protein GH43 G:(G) Carbohydrate transport and metabolism ENOG503P1F3 GO_function: GO:0004553 - hydrolase activity, hydrolyzing O-glycosyl compounds [Evidence IEA], GO_process: GO:0005975 - carbohydrate metabolic process [Evidence IEA] IPR006710 Glycoside hydrolase, family 43, IPR023296 Glycosyl hydrolase, five-bladed beta-propellor domain superfamily PF04616 NA FALSE FALSE
    XANPAGTX0501_005244-T1 1.304550 hypothetical protein AA3 E:(E) Amino acid transport and metabolism ENOG503NU6B, ENOG503NUFD GO_function: GO:0016614 - oxidoreductase activity, acting on CH-OH group of donors [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily PF00732, PF05199 NA K00115 FALSE FALSE
    XANPAGTX0501_007388-T1 1.269375 hypothetical protein NA S:(S) Function unknown ENOG503P3AN GO_function: GO:0005515 - protein binding [Evidence IEA] IPR002372 Pyrrolo-quinoline quinone repeat, IPR011047 Quinoprotein alcohol dehydrogenase-like superfamily, IPR015943 WD40/YVTN repeat-like-containing domain superfamily, IPR018391 Pyrrolo-quinoline quinone beta-propeller repeat NA NA FALSE FALSE
    XANPAGTX0501_006386-T1 1.206100 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE
    XANPAGTX0501_002723-T1 1.203666 hypothetical protein NA S:(S) Function unknown ENOG503NXPR NA IPR039535 Arylsulfotransferase-like PF05935, PF14269 NA FALSE FALSE
    XANPAGTX0501_006694-T1 1.155895 hypothetical protein NA NA ENOG503P24M NA NA NA NA FALSE FALSE
    XANPAGTX0501_010191-T1 1.151281 hypothetical protein CE5 G:(G) Carbohydrate transport and metabolism ENOG503P4Q7 GO_component: GO:0005576 - extracellular region [Evidence IEA], GO_function: GO:0016787 - hydrolase activity [Evidence IEA], GO_function: GO:0050525 - cutinase activity [Evidence IEA] IPR000675 Cutinase/acetylxylan esterase, IPR011150 Cutinase, monofunctional, IPR029058 Alpha/Beta hydrolase fold PF01083 NA FALSE TRUE
    XANPAGTX0501_001051-T1 1.077085 hypothetical protein GH17 G:(G) Carbohydrate transport and metabolism ENOG503NXKT NA IPR017853 Glycoside hydrolase superfamily NA NA FALSE FALSE
    XANPAGTX0501_009124-T1 1.069091 hypothetical protein NA S:(S) Function unknown ENOG503NVXI, ENOG503P4UQ GO_component: GO:0005618 - cell wall [Evidence IEA], GO_function: GO:0005199 - structural constituent of cell wall [Evidence IEA] IPR000420 Yeast PIR protein repeat PF00399 NA FALSE FALSE
    XANPAGTX0501_007482-T1 1.051174 hypothetical protein GH132 S:(S) Function unknown ENOG503NVKV NA IPR005556 SUN family PF03856 NA FALSE FALSE
    XANPAGTX0501_003140-T1 1.035102 hypothetical protein AA3 E:(E) Amino acid transport and metabolism ENOG503PBWU GO_function: GO:0016614 - oxidoreductase activity, acting on CH-OH group of donors [Evidence IEA], GO_function: GO:0050660 - flavin adenine dinucleotide binding [Evidence IEA], GO_process: GO:0055114 - oxidation-reduction process [Evidence IEA] IPR000172 Glucose-methanol-choline oxidoreductase, N-terminal, IPR007867 Glucose-methanol-choline oxidoreductase, C-terminal, IPR012132 Glucose-methanol-choline oxidoreductase, IPR036188 FAD/NAD(P)-binding domain superfamily PF00732, PF05199 NA TRUE FALSE
    XANPAGTX0501_009902-T1 1.034038 hypothetical protein NA P:(P) Inorganic ion transport and metabolism ENOG503NY8E NA IPR001148 Alpha carbonic anhydrase domain, IPR036398 Alpha carbonic anhydrase domain superfamily, IPR041891 Carbonic anhydrase, prokaryotic-like PF00194 NA K01672 FALSE FALSE
    XANPAGTX0501_010393-T1 1.019087 hypothetical protein NA NA NA NA NA NA NA FALSE FALSE

    4.9. Putative NLRs and other self/non-self recognition proteins

    Putative NLRs

    • Got the list of putative NLRs
    nlr_list<-read.delim2("../../02_long_read_assemblies/analysis_and_temp_files/10_nlr/NLRs.txt")
    • Four upregulated in lichen, one in cluture
    nlr_sig<-sig %>% filter(target_id %in% nlr_list$TranscriptID)
    nlr_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       1
    ## 2 lichen        4
    ## 3 low_logFC     9
    • For all lichen-upregulated, we can see they are expressed in culture too
    nlr_lichen<-nlr_sig$target_id[nlr_sig$change=="lichen"]
    plot_bootstrap(so, 
                   target_id = nlr_lichen[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[3], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlr_lichen[4], 
                   units = "est_counts", 
                   color_by = "condition")

    • What kind of NLRs are they?
      • XANPAGTX0501_005494-T1: NACHT + Ankyrin domains
      • XANPAGTX0501_002585-T1: HeLo + NACHT + Ankyrin domains **This is promising: “The HeLo domain of HET-S represents the cell death execution domain in the het-s/het-S system” quote from Daskalov et al. 2012
      • XANPAGTX0501_005102-T1: Nucleoside Phosphotase + NACHT + Ankyrin domains
      • XANPAGTX0501_010687-T1: NACHT + Ankyrin domains
    • The one that is culture-upregulated, is inconsistent and is highly-expressed in most lichen samples too
    plot_bootstrap(so, 
                   target_id = nlr_sig$target_id[nlr_sig$change=="culture"], 
                   units = "est_counts", 
                   color_by = "condition")

    • The protein is NB-ARC + TRP

    Broader set of NLR-like proteins

    • Got the list of NLR-like proteins, including those without a complete set of domains
    nlr_like<-read.delim2("../../02_long_read_assemblies/analysis_and_temp_files/10_nlr/NLR_like.txt")
    • 12 upregulated in lichen, 3 in cluture
    nlrlike_sig<-sig %>% filter(target_id %in% nlr_like$TranscriptID) %>% arrange(b)
    nlrlike_sig %>% group_by(change) %>% summarise(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       3
    ## 2 lichen       12
    ## 3 low_logFC    14
    • Some lichen-upregulated are very strongly upregulated
      • XANPAGTX0501_007753-T1 is not expressed in culture but strongly expressed in nearly all lichen samples. It has one P-loop domain
      • XANPAGTX0501_010570-T1 is not expressed in culture but strongly expressed in nearly all lichen samples. It has P-loop domain + TPR
      • XANPAGTX0501_002436-T1 is expressed in both, but clearly is lichen-upregulated. It has STAND + NACHT domains (STAND function is unknown)
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = nlrlike_sig$target_id[3], 
                   units = "est_counts", 
                   color_by = "condition")

    Other potential self/non-self recognition proteins

    • In total, the genome has 73 proteins with a HET or HaLo domain. Of them, 16 are from lichen-enriched orthogroups
    funannot2 %>% filter(grepl("IPR038305",InterPro_new) | 
                           grepl("IPR029498",InterPro_new) | 
                           grepl("IPR010730",InterPro_new)) %>% 
      group_by(lichen_ortho) %>% summarize(n=n())
    ## # A tibble: 2 × 2
    ##   lichen_ortho     n
    ##   <lgl>        <int>
    ## 1 FALSE           57
    ## 2 TRUE            16
    • Lichen and culture both have some upregulated
    het_sig<-funannot2 %>% left_join(sig,by=c("TranscriptID"="target_id")) %>%
      filter(grepl("IPR038305",InterPro_new) | 
                           grepl("IPR029498",InterPro_new) | 
                           grepl("IPR010730",InterPro_new)) %>% arrange(b)
    het_sig %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 4 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       6
    ## 2 lichen       14
    ## 3 low_logFC    24
    ## 4 <NA>         29
    • Here only showing those proteins not discussed above as NLRs or NLR-like
    het_sig2<-het_sig %>% filter(!(TranscriptID %in% nlrlike_sig$target_id),!is.na(b)) 
    het_sig2 %>% group_by(change) %>% summarize(n=n())
    ## # A tibble: 3 × 2
    ##   change        n
    ##   <chr>     <int>
    ## 1 culture       5
    ## 2 lichen       10
    ## 3 low_logFC    20
    • Visualize most lichen-upregulated
      • Top two have only HET domain
      • The third one has HeLo domains and protein kinase domains
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[2], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[3], 
                   units = "est_counts", 
                   color_by = "condition")

    * Visualize most culture-upregulated * For first and third, expression in lichens is very non-uniform * Second looks more regular

    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)-1], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = het_sig2$TranscriptID[nrow(het_sig2)-2], 
                   units = "est_counts", 
                   color_by = "condition")

    Protein ubiquitination

    • In addition to already pictured XANPAGTX0501_001643-T1, I wanted to visualize other F-box proteins - primarily to check that they are upregulated among all lichen samples, not just apothecia
      • All seem good, inluding those with the b value most close to the threshold
    fbox<-sig2 %>% filter(grepl("F-box",InterPro_new),change!="low_logFC")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[10], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[9], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[4], 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = fbox$target_id[5], 
                   units = "est_counts", 
                   color_by = "condition")
    
    poz<-sig2 %>% filter(grepl("BTB/POZ",InterPro_new),change!="low_logFC")
    plot_bootstrap(so, 
                   target_id = poz$target_id[3], 
                   units = "est_counts", 
                   color_by = "condition")

    RNAi and RNA binding

    • Similarly, wanted to check that they are upregulated among all lichen samples, not just apothecia
      • again, all is good
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_002123-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_003775-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_008184-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    Apothecia-upregulated transporter

    • wanted to check thow looks the profile of the inositol and sugar transporters that is upregulated in apothecia, but not in lichen compared to culture. The stats look legit ( althought the degree of variation for the inositol transporter in the culture is peculiar)
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_001825-T1", 
                   units = "est_counts", 
                   color_by = "condition")
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_009737-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    5. Preparing visuals

    Big heatmap, grouped by function

    • Since Complexheatmap didn’t allow me to have multile vertical panels united by one top annotation with single clustering, had to make two heatmapes (one split into panels and one whole) and combine manually in AI
    • Draw whole heatmap, which will be used for the top annotation and clustering
    #make color mapping consistent between different heatmaps
    library(circlize)
    library(extrafont)
    col_fun <- colorRamp2(seq(0,8,length.out = 100), viridis(100))
    
    filt_mat<-subset(trans_mat, rownames(trans_mat) %in% annotation$TranscriptID)
    HM_all = Heatmap(filt_mat, show_row_names = F, show_column_names = T, row_title_rot = 0, col=col_fun,top_annotation = ta,heatmap_legend_param = list(title = "Expression: log(TPM)"))
    
    #get the column order
    colord<-column_order(HM_all)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); column_order(ht)`.
    colord<-colnames(filt_mat)[colord]
    
    draw(HM_all)
    
    pdf(file="../results/big_heatmap_unseparated.pdf",width=6,height=6)
    draw(HM_all)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    • Draw two halves of the actual heatmaps
    #define function
    draw_heatmap_by_group<-function(gene_group,annotation_table = annotation_temp,matrix = trans_mat, order = colord){
      list<-annotation_table$TranscriptID[annotation_table$Function==gene_group]
      filt_mat<-subset(matrix, rownames(matrix) %in% list)
      HM = Heatmap(filt_mat, show_row_names = F, show_column_names = T, row_title = gene_group, row_title_rot = 0, col=col_fun,  show_heatmap_legend = FALSE,
        cluster_columns = FALSE,column_order = order,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
        row_title_gp = gpar(fontsize = 7, font="Arial"))
      return(HM)
    }
    
    
    #make a list of all gene groups in the right order
    annotation_temp<-annotation %>% left_join(mult_list %>% select(Function, Function_type) %>% unique()) %>% arrange(Function)
    annotation_temp$Function_type<-factor(annotation_temp$Function_type,
      levels=c("Fungal multicellularity", "Expression regulation","Signal transduction"),ordered = T)
    annotation_temp<-annotation_temp %>% arrange(Function_type)
    
    #rename some of the functions to make shorter
    annotation_temp$Function[annotation_temp$Function=="Acetyl-CoA production and metabolism"]<-"Acetyl-CoA production\nand metabolism"
    annotation_temp$Function[annotation_temp$Function=="Cell division, proliferation and growth"]<-"Cell division and growth"
    annotation_temp$Function[annotation_temp$Function=="Cell surface and cell wall proteins"]<-"Cell wall proteins"
    annotation_temp$Function[annotation_temp$Function=="Cell surface and cell wall proteins"]<-"Cell wall proteins"
    
    #apply to all gene groups
    l<-lapply(as.vector(unique(annotation_temp$Function)),draw_heatmap_by_group)
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #add other gene groups, which aren't in the multicellularity list
    #protein kinases
    kinase_list<-funannot$TranscriptID[grepl("IPR000719",funannot$InterPro_new) & funannot$TranscriptID %in% sig$target_id[sig$change %in% c("lichen","culture")]]
    filt_mat_kinase<-subset(trans_mat, rownames(trans_mat) %in% kinase_list)
    HM_kinase = Heatmap(filt_mat_kinase, show_row_names = F, show_column_names = T, row_title = "Protein Kinases", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #NLRs
    nlr_list<-nlr_sig$target_id[nlr_sig$change %in% c("lichen","culture")]
    filt_mat_nlr<-subset(trans_mat, rownames(trans_mat) %in% nlr_list)
    HM_nlr = Heatmap(filt_mat_nlr, show_row_names = F, show_column_names = T, row_title = "Putative NLRs", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #Other het-incopatibility
    het_list<-het_sig$TranscriptID[het_sig$change %in% c("lichen","culture") & !(het_sig$TranscriptID %in% nlr_list)]
    filt_mat_het<-subset(trans_mat, rownames(trans_mat) %in% het_list)
    HM_het = Heatmap(filt_mat_het, show_row_names = F, show_column_names = T, row_title = "Other self/non-self\nrecognition", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #replace 4 panels for signaling pathways with one 
    signaling_list<-annotation$TranscriptID[annotation$Function %in% c("G-proteins and GPCR", "MAPK signaling pathway","Oxylipins","Velvet")]
    filt_mat_signal<-subset(trans_mat, rownames(trans_mat) %in% signaling_list)
    HM_signal = Heatmap(filt_mat_signal, show_row_names = F, show_column_names = T, row_title = "Signal transduction", row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
           cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"))
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    #draw
    ht_list <- l[[4]] %v% l[[5]] %v% l[[7]] %v% l[[6]] %v% l[[3]] %v% l[[8]] %v% 
      l[[20]] %v% l[[14]] %v% l[[1]] %v% l[[11]] %v% l[[2]] %v% l[[15]] %v% l[[16]] %v% l[[19]]  %v% HM_signal %v% HM_kinase %v% HM_nlr %v% HM_het
    
    #draw in two parts
    ht_list1 <- l[[4]] %v% l[[5]] %v% l[[7]] %v% l[[6]] %v% l[[3]] %v% l[[8]] %v% 
      l[[20]] %v% l[[14]] %v% l[[1]] %v% l[[11]] %v% l[[2]] 
    
    ht_list2 <- l[[15]] %v% l[[16]] %v% l[[19]]  %v% HM_signal %v% HM_kinase %v% HM_nlr %v% HM_het
    
    draw(ht_list1)
    • Export the heat map
    pdf(file="../results/big_heatmap_part1.pdf",width=4.25,height=4.5)
    draw(ht_list1, newpage = FALSE)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    pdf(file="../results/big_heatmap_part2.pdf",width=4,height=3.5)
    draw(ht_list2)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    • Parietin gene cluster
    parietin_list<-c("XANPAGTX0501_008850-T1","XANPAGTX0501_008851-T1","XANPAGTX0501_008852-T1","XANPAGTX0501_008853-T1")
    
    filt_mat_parietin<-subset(trans_mat, rownames(trans_mat) %in% parietin_list)
    HM_parietin = Heatmap(filt_mat_parietin, show_row_names = T, show_column_names = T,cluster_rows = FALSE, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/parietin_heatmap.pdf",width=10,height=2.5)
    draw(HM_parietin)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_parietin
    • Make new heat map for all DGE secreted proteins
    secreted_list<-secreted_sig$target_id[secreted_sig$change!="low_logFC"]
    filt_mat_secreted<-subset(trans_mat, rownames(trans_mat) %in% secreted_list)
    HM_secreted = Heatmap(filt_mat_secreted, show_row_names = F, show_column_names = T,cluster_rows = T, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/secreted_dge_heatmap.pdf",width=10,height=8)
    draw(HM_secreted)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_secreted
    • Make box plot for the putative effecotr XANPAGTX0501_009887-T1
    plot_bootstrap(so, 
                   target_id = "XANPAGTX0501_009887-T1", 
                   units = "est_counts", 
                   color_by = "condition")

    * Make heat map for the same effector

    filt_mat_eff<-subset(trans_mat, rownames(trans_mat) =="XANPAGTX0501_009887-T1")
    HM_eff = Heatmap(filt_mat_eff, show_row_names = F, show_column_names = T,cluster_rows = T, row_title_rot = 0, col=viridis(100), heatmap_legend_param = list(title = "Expression: log(TPM)"), top_annotation = ta)
    
    pdf(file="../results/XANPAGTX0501_009887-T1_heatmap.pdf",width=10,height=2)
    draw(HM_eff)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    HM_eff
    • Make heatmap for all core biosynthetic genes (regardless of their DGE status)
      • Added manually DGE status in Ai
    antismash_core<-antismash_gene_all %>% filter(Core_gene==T) %>%
      left_join(funannot2,relationship = "many-to-many") %>% 
      left_join(sig, by = c("TranscriptID"="target_id")) 
    
    
    filt_mat_core<-subset(trans_mat, rownames(trans_mat) %in% antismash_core$TranscriptID)
    
    #define row annotation
    ##for one protein, XANPAGTX0501_001694-T1, we had two annotations since it is a core gene in two cluster: Xp_GTX0501_2_Cluster_6 and Xp_GTX0501_2_Cluster_7. here I picked Xp_GTX0501_2_Cluster_7, since Xp_GTX0501_2_Cluster_6 had two core genes
    ra<-data.frame("TranscriptID"=rownames(filt_mat_core)) %>% left_join(antismash_core) %>%
      filter(TranscriptID!="XANPAGTX0501_001694-T1"|Cluster!="Xp_GTX0501_2_Cluster_6") %>%
      select(Description)
    ra_colors = c("fungal-RiPP-like"= brewer.pal(8,"Dark2")[1], "NRP-metallophore"= brewer.pal(8,"Dark2")[4], "NRPS"= brewer.pal(8,"Dark2")[3],
           "NRPS-like"= brewer.pal(8,"Dark2")[5], "NRPS,T1PKS"= brewer.pal(8,"Dark2")[7], "T1PKS"= brewer.pal(8,"Dark2")[6], 
               "T3PKS" = brewer.pal(8,"Dark2")[8], "terpene"= brewer.pal(8,"Dark2")[2])
    ra = rowAnnotation(df = ra,col = list(Description = ra_colors))
    
    HM_core = Heatmap(filt_mat_core, show_row_names = F, show_column_names = T,
                      row_title_rot = 0, col=col_fun, show_heatmap_legend = FALSE,
          cluster_columns = FALSE,column_order = colord,
           column_names_gp = gpar(fontsize = 6, font="Arial"),
           row_title_gp = gpar(fontsize = 7, font="Arial"),
          top_annotation = ta,
          right_annotation = ra)
    ## Warning in validGP(list(...)): NAs introduced by coercion
    
    ## Warning in validGP(list(...)): NAs introduced by coercion
    pdf(file="../results/all_core_SM_heatmap.pdf",width=8,height=8)
    draw(HM_core)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    #get order of the rows in the heatmap and match to b values
    rord = row_order(HM_core)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
    rord<-rownames(filt_mat_core)[rord]
    core_dge<-data.frame("TranscriptID"=rord) %>% left_join(antismash_core) %>%
      filter(TranscriptID!="XANPAGTX0501_001694-T1"|Cluster!="Xp_GTX0501_2_Cluster_6") %>%
      select(TranscriptID,b,Description)
    head(core_dge)
    ##             TranscriptID         b      Description
    ## 1 XANPAGTX0501_001972-T1  2.363522             NRPS
    ## 2 XANPAGTX0501_001973-T1  2.358272            T1PKS
    ## 3 XANPAGTX0501_009715-T1  2.760824 fungal-RiPP-like
    ## 4 XANPAGTX0501_009716-T1  2.958263 fungal-RiPP-like
    ## 5 XANPAGTX0501_009866-T1        NA          terpene
    ## 6 XANPAGTX0501_001064-T1 -1.531609 fungal-RiPP-like
    • Make a heat map for putative effectors
      • Include all genes from the ‘cluster of interest’ form the clustering of secretome based on the structures
    cl_int<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/05_cluster_structures/clustering_results.txt") %>% filter(ClusterID %in% c("cl04","cl16","cl18","cl19","cl20","cl21","cl22","cl23",
                          "cl24","cl24a","cl37","cl38","cl40","cl41","cl42","cl80",
                          "cl29","cl34"),Gene_Expression!="Non-DGE")
    filt_mat_int<-subset(trans_mat, rownames(trans_mat) %in% cl_int$ProteinID)
    
    #define row annotation
    ra2<-data.frame("ProteinID"=rownames(filt_mat_int)) %>% left_join(cl_int) %>%
      select(ClusterID)
    
    #define color to match the tree
    col_cl<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/05_cluster_structures/cluster_col.txt",sep=",") %>% filter(cluster_id %in% cl_int$ClusterID)
    ra2_colors<-as.character(col_cl$color)
    names(ra2_colors)<-as.character(col_cl$cluster_id)
    ra2 = rowAnnotation(df = ra2,col = list(ClusterID = ra2_colors),show_legend = F)
    ta2 = HeatmapAnnotation(df=s2c,col = list(condition = ta_colors),show_legend = F)
    
    HM_int = Heatmap(filt_mat_int, show_row_names = T, show_column_names = T,
                      row_title_rot = 0, col=col_fun, show_heatmap_legend = F,
          cluster_columns = FALSE,column_order = colord,
          top_annotation = ta2,
          right_annotation = ra2,
          column_names_gp = gpar(fontsize = 6),
           row_names_gp = gpar(fontsize = 6))
    
    pdf(file="../results/all_secretome_int_heatmap.pdf",width=4,height=4)
    draw(HM_int)
    dev.off()
    ## quartz_off_screen 
    ##                 2
    #get order of rows to match to clusters
    rord = row_order(HM_int)
    ## Warning: The heatmap has not been initialized. You might have different results
    ## if you repeatedly execute this function, e.g. when row_km/column_km was
    ## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
    rord<-rownames(filt_mat_int)[rord]
    rord2<-data.frame("ProteinID"=rord) %>% left_join(cl_int)
    
    #get number of DGE per cluster of interest
    t<-read.delim2("../../10_lichen_effectors/analysis_and_temp_files/05_cluster_structures/clustering_results.txt") %>% filter(ClusterID %in% c("cl04","cl16","cl18","cl19","cl20","cl21","cl22","cl23",
                          "cl24","cl24a","cl37","cl38","cl40","cl41","cl42","cl80",
                          "cl29","cl34")) 
    
    t%>% group_by(ClusterID,Gene_Expression) %>% summarise(n=n()) %>%
      pivot_wider(names_from = Gene_Expression,values_from = n,values_fill=0)
    ## # A tibble: 18 × 3
    ## # Groups:   ClusterID [18]
    ##    ClusterID `Non-DGE` `Upregulated in lichen`
    ##    <chr>         <int>                   <int>
    ##  1 cl04              6                       2
    ##  2 cl16              2                       0
    ##  3 cl18              2                       4
    ##  4 cl19              3                       1
    ##  5 cl20              7                       0
    ##  6 cl21              4                       1
    ##  7 cl22              0                       2
    ##  8 cl23              2                       1
    ##  9 cl24              5                       4
    ## 10 cl24a             6                       5
    ## 11 cl29              3                       1
    ## 12 cl34              3                       1
    ## 13 cl37              1                       1
    ## 14 cl38              3                       1
    ## 15 cl40              1                       1
    ## 16 cl41              0                       3
    ## 17 cl42              0                       1
    ## 18 cl80              1                       1